1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 void CGOpenMPRuntime::clear() {
1268   InternalVars.clear();
1269   // Clean non-target variable declarations possibly used only in debug info.
1270   for (const auto &Data : EmittedNonTargetVariables) {
1271     if (!Data.getValue().pointsToAliveValue())
1272       continue;
1273     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1274     if (!GV)
1275       continue;
1276     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1277       continue;
1278     GV->eraseFromParent();
1279   }
1280 }
1281 
1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1283   SmallString<128> Buffer;
1284   llvm::raw_svector_ostream OS(Buffer);
1285   StringRef Sep = FirstSeparator;
1286   for (StringRef Part : Parts) {
1287     OS << Sep << Part;
1288     Sep = Separator;
1289   }
1290   return OS.str();
1291 }
1292 
1293 static llvm::Function *
1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1295                           const Expr *CombinerInitializer, const VarDecl *In,
1296                           const VarDecl *Out, bool IsCombiner) {
1297   // void .omp_combiner.(Ty *in, Ty *out);
1298   ASTContext &C = CGM.getContext();
1299   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1300   FunctionArgList Args;
1301   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1302                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1303   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1304                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1305   Args.push_back(&OmpOutParm);
1306   Args.push_back(&OmpInParm);
1307   const CGFunctionInfo &FnInfo =
1308       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1309   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1310   std::string Name = CGM.getOpenMPRuntime().getName(
1311       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1312   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1313                                     Name, &CGM.getModule());
1314   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1315   if (CGM.getLangOpts().Optimize) {
1316     Fn->removeFnAttr(llvm::Attribute::NoInline);
1317     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1318     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1319   }
1320   CodeGenFunction CGF(CGM);
1321   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1322   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1323   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1324                     Out->getLocation());
1325   CodeGenFunction::OMPPrivateScope Scope(CGF);
1326   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1327   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1332   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1333     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1334         .getAddress();
1335   });
1336   (void)Scope.Privatize();
1337   if (!IsCombiner && Out->hasInit() &&
1338       !CGF.isTrivialInitializer(Out->getInit())) {
1339     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1340                          Out->getType().getQualifiers(),
1341                          /*IsInitializer=*/true);
1342   }
1343   if (CombinerInitializer)
1344     CGF.EmitIgnoredExpr(CombinerInitializer);
1345   Scope.ForceCleanup();
1346   CGF.FinishFunction();
1347   return Fn;
1348 }
1349 
1350 void CGOpenMPRuntime::emitUserDefinedReduction(
1351     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1352   if (UDRMap.count(D) > 0)
1353     return;
1354   llvm::Function *Combiner = emitCombinerOrInitializer(
1355       CGM, D->getType(), D->getCombiner(),
1356       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1357       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1358       /*IsCombiner=*/true);
1359   llvm::Function *Initializer = nullptr;
1360   if (const Expr *Init = D->getInitializer()) {
1361     Initializer = emitCombinerOrInitializer(
1362         CGM, D->getType(),
1363         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1364                                                                      : nullptr,
1365         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1366         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1367         /*IsCombiner=*/false);
1368   }
1369   UDRMap.try_emplace(D, Combiner, Initializer);
1370   if (CGF) {
1371     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1372     Decls.second.push_back(D);
1373   }
1374 }
1375 
1376 std::pair<llvm::Function *, llvm::Function *>
1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1378   auto I = UDRMap.find(D);
1379   if (I != UDRMap.end())
1380     return I->second;
1381   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1382   return UDRMap.lookup(D);
1383 }
1384 
1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1386     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1387     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1388     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1389   assert(ThreadIDVar->getType()->isPointerType() &&
1390          "thread id variable must be of type kmp_int32 *");
1391   CodeGenFunction CGF(CGM, true);
1392   bool HasCancel = false;
1393   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1394     HasCancel = OPD->hasCancel();
1395   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1396     HasCancel = OPSD->hasCancel();
1397   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1398     HasCancel = OPFD->hasCancel();
1399   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1402     HasCancel = OPFD->hasCancel();
1403   else if (const auto *OPFD =
1404                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1405     HasCancel = OPFD->hasCancel();
1406   else if (const auto *OPFD =
1407                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1408     HasCancel = OPFD->hasCancel();
1409   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1410                                     HasCancel, OutlinedHelperName);
1411   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1412   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1413 }
1414 
1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1416     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1417     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1418   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1419   return emitParallelOrTeamsOutlinedFunction(
1420       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1421 }
1422 
1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1424     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1425     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1426   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1427   return emitParallelOrTeamsOutlinedFunction(
1428       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1429 }
1430 
1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1432     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1433     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1434     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1435     bool Tied, unsigned &NumberOfParts) {
1436   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1437                                               PrePostActionTy &) {
1438     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1439     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1440     llvm::Value *TaskArgs[] = {
1441         UpLoc, ThreadID,
1442         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1443                                     TaskTVar->getType()->castAs<PointerType>())
1444             .getPointer()};
1445     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1446   };
1447   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1448                                                             UntiedCodeGen);
1449   CodeGen.setAction(Action);
1450   assert(!ThreadIDVar->getType()->isPointerType() &&
1451          "thread id variable must be of type kmp_int32 for tasks");
1452   const OpenMPDirectiveKind Region =
1453       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1454                                                       : OMPD_task;
1455   const CapturedStmt *CS = D.getCapturedStmt(Region);
1456   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1457   CodeGenFunction CGF(CGM, true);
1458   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1459                                         InnermostKind,
1460                                         TD ? TD->hasCancel() : false, Action);
1461   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1462   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1463   if (!Tied)
1464     NumberOfParts = Action.getNumberOfParts();
1465   return Res;
1466 }
1467 
1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1469                              const RecordDecl *RD, const CGRecordLayout &RL,
1470                              ArrayRef<llvm::Constant *> Data) {
1471   llvm::StructType *StructTy = RL.getLLVMType();
1472   unsigned PrevIdx = 0;
1473   ConstantInitBuilder CIBuilder(CGM);
1474   auto DI = Data.begin();
1475   for (const FieldDecl *FD : RD->fields()) {
1476     unsigned Idx = RL.getLLVMFieldNo(FD);
1477     // Fill the alignment.
1478     for (unsigned I = PrevIdx; I < Idx; ++I)
1479       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1480     PrevIdx = Idx + 1;
1481     Fields.add(*DI);
1482     ++DI;
1483   }
1484 }
1485 
1486 template <class... As>
1487 static llvm::GlobalVariable *
1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1489                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1490                    As &&... Args) {
1491   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1492   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1493   ConstantInitBuilder CIBuilder(CGM);
1494   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1495   buildStructValue(Fields, CGM, RD, RL, Data);
1496   return Fields.finishAndCreateGlobal(
1497       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1498       std::forward<As>(Args)...);
1499 }
1500 
1501 template <typename T>
1502 static void
1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1504                                          ArrayRef<llvm::Constant *> Data,
1505                                          T &Parent) {
1506   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1507   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1508   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1509   buildStructValue(Fields, CGM, RD, RL, Data);
1510   Fields.finishAndAddTo(Parent);
1511 }
1512 
1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1514   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1515   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1516   FlagsTy FlagsKey(Flags, Reserved2Flags);
1517   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1518   if (!Entry) {
1519     if (!DefaultOpenMPPSource) {
1520       // Initialize default location for psource field of ident_t structure of
1521       // all ident_t objects. Format is ";file;function;line;column;;".
1522       // Taken from
1523       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1524       DefaultOpenMPPSource =
1525           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1526       DefaultOpenMPPSource =
1527           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1528     }
1529 
1530     llvm::Constant *Data[] = {
1531         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1532         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1533         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1534         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1535     llvm::GlobalValue *DefaultOpenMPLocation =
1536         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1537                            llvm::GlobalValue::PrivateLinkage);
1538     DefaultOpenMPLocation->setUnnamedAddr(
1539         llvm::GlobalValue::UnnamedAddr::Global);
1540 
1541     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1542   }
1543   return Address(Entry, Align);
1544 }
1545 
1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1547                                              bool AtCurrentPoint) {
1548   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1550 
1551   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1552   if (AtCurrentPoint) {
1553     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1554         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1555   } else {
1556     Elem.second.ServiceInsertPt =
1557         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1558     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1559   }
1560 }
1561 
1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (Elem.second.ServiceInsertPt) {
1565     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1566     Elem.second.ServiceInsertPt = nullptr;
1567     Ptr->eraseFromParent();
1568   }
1569 }
1570 
1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1572                                                  SourceLocation Loc,
1573                                                  unsigned Flags) {
1574   Flags |= OMP_IDENT_KMPC;
1575   // If no debug info is generated - return global default location.
1576   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1577       Loc.isInvalid())
1578     return getOrCreateDefaultLocation(Flags).getPointer();
1579 
1580   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1581 
1582   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1583   Address LocValue = Address::invalid();
1584   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1585   if (I != OpenMPLocThreadIDMap.end())
1586     LocValue = Address(I->second.DebugLoc, Align);
1587 
1588   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1589   // GetOpenMPThreadID was called before this routine.
1590   if (!LocValue.isValid()) {
1591     // Generate "ident_t .kmpc_loc.addr;"
1592     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1593     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1594     Elem.second.DebugLoc = AI.getPointer();
1595     LocValue = AI;
1596 
1597     if (!Elem.second.ServiceInsertPt)
1598       setLocThreadIdInsertPt(CGF);
1599     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1600     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1601     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1602                              CGF.getTypeSize(IdentQTy));
1603   }
1604 
1605   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1606   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1607   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1608   LValue PSource =
1609       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1610 
1611   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1612   if (OMPDebugLoc == nullptr) {
1613     SmallString<128> Buffer2;
1614     llvm::raw_svector_ostream OS2(Buffer2);
1615     // Build debug location
1616     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1617     OS2 << ";" << PLoc.getFilename() << ";";
1618     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1619       OS2 << FD->getQualifiedNameAsString();
1620     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1621     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1622     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1623   }
1624   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1625   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1626 
1627   // Our callers always pass this to a runtime function, so for
1628   // convenience, go ahead and return a naked pointer.
1629   return LocValue.getPointer();
1630 }
1631 
1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1633                                           SourceLocation Loc) {
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   llvm::Value *ThreadID = nullptr;
1637   // Check whether we've already cached a load of the thread id in this
1638   // function.
1639   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1640   if (I != OpenMPLocThreadIDMap.end()) {
1641     ThreadID = I->second.ThreadID;
1642     if (ThreadID != nullptr)
1643       return ThreadID;
1644   }
1645   // If exceptions are enabled, do not use parameter to avoid possible crash.
1646   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1647       !CGF.getLangOpts().CXXExceptions ||
1648       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1649     if (auto *OMPRegionInfo =
1650             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1651       if (OMPRegionInfo->getThreadIDVariable()) {
1652         // Check if this an outlined function with thread id passed as argument.
1653         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1654         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1655         // If value loaded in entry block, cache it and use it everywhere in
1656         // function.
1657         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1658           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1659           Elem.second.ThreadID = ThreadID;
1660         }
1661         return ThreadID;
1662       }
1663     }
1664   }
1665 
1666   // This is not an outlined function region - need to call __kmpc_int32
1667   // kmpc_global_thread_num(ident_t *loc).
1668   // Generate thread id value and cache this value for use across the
1669   // function.
1670   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671   if (!Elem.second.ServiceInsertPt)
1672     setLocThreadIdInsertPt(CGF);
1673   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1674   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1675   llvm::CallInst *Call = CGF.Builder.CreateCall(
1676       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1677       emitUpdateLocation(CGF, Loc));
1678   Call->setCallingConv(CGF.getRuntimeCC());
1679   Elem.second.ThreadID = Call;
1680   return Call;
1681 }
1682 
1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1684   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1685   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1686     clearLocThreadIdInsertPt(CGF);
1687     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1688   }
1689   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1690     for(auto *D : FunctionUDRMap[CGF.CurFn])
1691       UDRMap.erase(D);
1692     FunctionUDRMap.erase(CGF.CurFn);
1693   }
1694   auto I = FunctionUDMMap.find(CGF.CurFn);
1695   if (I != FunctionUDMMap.end()) {
1696     for(auto *D : I->second)
1697       UDMMap.erase(D);
1698     FunctionUDMMap.erase(I);
1699   }
1700 }
1701 
1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1703   return IdentTy->getPointerTo();
1704 }
1705 
1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1707   if (!Kmpc_MicroTy) {
1708     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1709     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1710                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1711     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1712   }
1713   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1714 }
1715 
1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1717   llvm::FunctionCallee RTLFn = nullptr;
1718   switch (static_cast<OpenMPRTLFunction>(Function)) {
1719   case OMPRTL__kmpc_fork_call: {
1720     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1721     // microtask, ...);
1722     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1723                                 getKmpc_MicroPointerTy()};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1727     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1728       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1729         llvm::LLVMContext &Ctx = F->getContext();
1730         llvm::MDBuilder MDB(Ctx);
1731         // Annotate the callback behavior of the __kmpc_fork_call:
1732         //  - The callback callee is argument number 2 (microtask).
1733         //  - The first two arguments of the callback callee are unknown (-1).
1734         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1735         //    callback callee.
1736         F->addMetadata(
1737             llvm::LLVMContext::MD_callback,
1738             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1739                                         2, {-1, -1},
1740                                         /* VarArgsArePassed */ true)}));
1741       }
1742     }
1743     break;
1744   }
1745   case OMPRTL__kmpc_global_thread_num: {
1746     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1747     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1751     break;
1752   }
1753   case OMPRTL__kmpc_threadprivate_cached: {
1754     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1755     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1757                                 CGM.VoidPtrTy, CGM.SizeTy,
1758                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical: {
1765     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_critical_with_hint: {
1776     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1777     // kmp_critical_name *crit, uintptr_t hint);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1779                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1780                                 CGM.IntPtrTy};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1784     break;
1785   }
1786   case OMPRTL__kmpc_threadprivate_register: {
1787     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1788     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1789     // typedef void *(*kmpc_ctor)(void *);
1790     auto *KmpcCtorTy =
1791         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1792                                 /*isVarArg*/ false)->getPointerTo();
1793     // typedef void *(*kmpc_cctor)(void *, void *);
1794     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1795     auto *KmpcCopyCtorTy =
1796         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1797                                 /*isVarArg*/ false)
1798             ->getPointerTo();
1799     // typedef void (*kmpc_dtor)(void *);
1800     auto *KmpcDtorTy =
1801         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1802             ->getPointerTo();
1803     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1804                               KmpcCopyCtorTy, KmpcDtorTy};
1805     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1806                                         /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1808     break;
1809   }
1810   case OMPRTL__kmpc_end_critical: {
1811     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1812     // kmp_critical_name *crit);
1813     llvm::Type *TypeParams[] = {
1814         getIdentTyPointerTy(), CGM.Int32Ty,
1815         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1816     auto *FnTy =
1817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1819     break;
1820   }
1821   case OMPRTL__kmpc_cancel_barrier: {
1822     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1823     // global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1828     break;
1829   }
1830   case OMPRTL__kmpc_barrier: {
1831     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1832     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1833     auto *FnTy =
1834         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1835     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1836     break;
1837   }
1838   case OMPRTL__kmpc_for_static_fini: {
1839     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841     auto *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1844     break;
1845   }
1846   case OMPRTL__kmpc_push_num_threads: {
1847     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_int32 num_threads)
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1850                                 CGM.Int32Ty};
1851     auto *FnTy =
1852         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1853     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1854     break;
1855   }
1856   case OMPRTL__kmpc_serialized_parallel: {
1857     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1858     // global_tid);
1859     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1860     auto *FnTy =
1861         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1862     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1863     break;
1864   }
1865   case OMPRTL__kmpc_end_serialized_parallel: {
1866     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1867     // global_tid);
1868     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1872     break;
1873   }
1874   case OMPRTL__kmpc_flush: {
1875     // Build void __kmpc_flush(ident_t *loc);
1876     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1877     auto *FnTy =
1878         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1879     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1880     break;
1881   }
1882   case OMPRTL__kmpc_master: {
1883     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1884     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1888     break;
1889   }
1890   case OMPRTL__kmpc_end_master: {
1891     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1893     auto *FnTy =
1894         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1895     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1896     break;
1897   }
1898   case OMPRTL__kmpc_omp_taskyield: {
1899     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1900     // int end_part);
1901     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1902     auto *FnTy =
1903         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1904     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1905     break;
1906   }
1907   case OMPRTL__kmpc_single: {
1908     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1909     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1913     break;
1914   }
1915   case OMPRTL__kmpc_end_single: {
1916     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1917     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1921     break;
1922   }
1923   case OMPRTL__kmpc_omp_task_alloc: {
1924     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1925     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1926     // kmp_routine_entry_t *task_entry);
1927     assert(KmpRoutineEntryPtrTy != nullptr &&
1928            "Type kmp_routine_entry_t must be created.");
1929     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1930                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1931     // Return void * and then cast to particular kmp_task_t type.
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1935     break;
1936   }
1937   case OMPRTL__kmpc_omp_target_task_alloc: {
1938     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1939     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1940     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1941     assert(KmpRoutineEntryPtrTy != nullptr &&
1942            "Type kmp_routine_entry_t must be created.");
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1944                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1945                                 CGM.Int64Ty};
1946     // Return void * and then cast to particular kmp_task_t type.
1947     auto *FnTy =
1948         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1949     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1950     break;
1951   }
1952   case OMPRTL__kmpc_omp_task: {
1953     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1954     // *new_task);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1956                                 CGM.VoidPtrTy};
1957     auto *FnTy =
1958         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1959     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1960     break;
1961   }
1962   case OMPRTL__kmpc_copyprivate: {
1963     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1964     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1965     // kmp_int32 didit);
1966     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1967     auto *CpyFnTy =
1968         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1970                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1971                                 CGM.Int32Ty};
1972     auto *FnTy =
1973         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1974     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1975     break;
1976   }
1977   case OMPRTL__kmpc_reduce: {
1978     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1979     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1980     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1981     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1982     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1983                                                /*isVarArg=*/false);
1984     llvm::Type *TypeParams[] = {
1985         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1986         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1987         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1991     break;
1992   }
1993   case OMPRTL__kmpc_reduce_nowait: {
1994     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1995     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1996     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1997     // *lck);
1998     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1999     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2000                                                /*isVarArg=*/false);
2001     llvm::Type *TypeParams[] = {
2002         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2003         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce: {
2011     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2019     break;
2020   }
2021   case OMPRTL__kmpc_end_reduce_nowait: {
2022     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2023     // kmp_critical_name *lck);
2024     llvm::Type *TypeParams[] = {
2025         getIdentTyPointerTy(), CGM.Int32Ty,
2026         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_begin_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn =
2041         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_omp_task_complete_if0: {
2045     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2046     // *new_task);
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2048                                 CGM.VoidPtrTy};
2049     auto *FnTy =
2050         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2051     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2052                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2053     break;
2054   }
2055   case OMPRTL__kmpc_ordered: {
2056     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2057     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2058     auto *FnTy =
2059         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2060     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2061     break;
2062   }
2063   case OMPRTL__kmpc_end_ordered: {
2064     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2065     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2069     break;
2070   }
2071   case OMPRTL__kmpc_omp_taskwait: {
2072     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2073     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2077     break;
2078   }
2079   case OMPRTL__kmpc_taskgroup: {
2080     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2081     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2085     break;
2086   }
2087   case OMPRTL__kmpc_end_taskgroup: {
2088     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2090     auto *FnTy =
2091         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2092     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2093     break;
2094   }
2095   case OMPRTL__kmpc_push_proc_bind: {
2096     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2097     // int proc_bind)
2098     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2102     break;
2103   }
2104   case OMPRTL__kmpc_omp_task_with_deps: {
2105     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2106     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2107     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2108     llvm::Type *TypeParams[] = {
2109         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2110         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2113     RTLFn =
2114         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2115     break;
2116   }
2117   case OMPRTL__kmpc_omp_wait_deps: {
2118     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2119     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2120     // kmp_depend_info_t *noalias_dep_list);
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2122                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2123                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2124     auto *FnTy =
2125         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2126     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2127     break;
2128   }
2129   case OMPRTL__kmpc_cancellationpoint: {
2130     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2131     // global_tid, kmp_int32 cncl_kind)
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2136     break;
2137   }
2138   case OMPRTL__kmpc_cancel: {
2139     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2140     // kmp_int32 cncl_kind)
2141     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2144     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2145     break;
2146   }
2147   case OMPRTL__kmpc_push_num_teams: {
2148     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2149     // kmp_int32 num_teams, kmp_int32 num_threads)
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2151         CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2155     break;
2156   }
2157   case OMPRTL__kmpc_fork_teams: {
2158     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2159     // microtask, ...);
2160     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                 getKmpc_MicroPointerTy()};
2162     auto *FnTy =
2163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2164     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2165     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2166       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2167         llvm::LLVMContext &Ctx = F->getContext();
2168         llvm::MDBuilder MDB(Ctx);
2169         // Annotate the callback behavior of the __kmpc_fork_teams:
2170         //  - The callback callee is argument number 2 (microtask).
2171         //  - The first two arguments of the callback callee are unknown (-1).
2172         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2173         //    callback callee.
2174         F->addMetadata(
2175             llvm::LLVMContext::MD_callback,
2176             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2177                                         2, {-1, -1},
2178                                         /* VarArgsArePassed */ true)}));
2179       }
2180     }
2181     break;
2182   }
2183   case OMPRTL__kmpc_taskloop: {
2184     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2185     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2186     // sched, kmp_uint64 grainsize, void *task_dup);
2187     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2188                                 CGM.IntTy,
2189                                 CGM.VoidPtrTy,
2190                                 CGM.IntTy,
2191                                 CGM.Int64Ty->getPointerTo(),
2192                                 CGM.Int64Ty->getPointerTo(),
2193                                 CGM.Int64Ty,
2194                                 CGM.IntTy,
2195                                 CGM.IntTy,
2196                                 CGM.Int64Ty,
2197                                 CGM.VoidPtrTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2201     break;
2202   }
2203   case OMPRTL__kmpc_doacross_init: {
2204     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2205     // num_dims, struct kmp_dim *dims);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2207                                 CGM.Int32Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2213     break;
2214   }
2215   case OMPRTL__kmpc_doacross_fini: {
2216     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2221     break;
2222   }
2223   case OMPRTL__kmpc_doacross_post: {
2224     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2225     // *vec);
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2227                                 CGM.Int64Ty->getPointerTo()};
2228     auto *FnTy =
2229         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2230     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2231     break;
2232   }
2233   case OMPRTL__kmpc_doacross_wait: {
2234     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2235     // *vec);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int64Ty->getPointerTo()};
2238     auto *FnTy =
2239         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2240     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2241     break;
2242   }
2243   case OMPRTL__kmpc_task_reduction_init: {
2244     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2245     // *data);
2246     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2247     auto *FnTy =
2248         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2249     RTLFn =
2250         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2251     break;
2252   }
2253   case OMPRTL__kmpc_task_reduction_get_th_data: {
2254     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2255     // *d);
2256     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(
2260         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2261     break;
2262   }
2263   case OMPRTL__kmpc_alloc: {
2264     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2265     // al); omp_allocator_handle_t type is void *.
2266     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2270     break;
2271   }
2272   case OMPRTL__kmpc_free: {
2273     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2274     // al); omp_allocator_handle_t type is void *.
2275     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2279     break;
2280   }
2281   case OMPRTL__kmpc_push_target_tripcount: {
2282     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2283     // size);
2284     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2285     llvm::FunctionType *FnTy =
2286         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2287     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2288     break;
2289   }
2290   case OMPRTL__tgt_target: {
2291     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2292     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2293     // *arg_types);
2294     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2295                                 CGM.VoidPtrTy,
2296                                 CGM.Int32Ty,
2297                                 CGM.VoidPtrPtrTy,
2298                                 CGM.VoidPtrPtrTy,
2299                                 CGM.Int64Ty->getPointerTo(),
2300                                 CGM.Int64Ty->getPointerTo()};
2301     auto *FnTy =
2302         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2303     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2304     break;
2305   }
2306   case OMPRTL__tgt_target_nowait: {
2307     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2308     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2309     // int64_t *arg_types);
2310     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2311                                 CGM.VoidPtrTy,
2312                                 CGM.Int32Ty,
2313                                 CGM.VoidPtrPtrTy,
2314                                 CGM.VoidPtrPtrTy,
2315                                 CGM.Int64Ty->getPointerTo(),
2316                                 CGM.Int64Ty->getPointerTo()};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2320     break;
2321   }
2322   case OMPRTL__tgt_target_teams: {
2323     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2324     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2325     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2326     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2327                                 CGM.VoidPtrTy,
2328                                 CGM.Int32Ty,
2329                                 CGM.VoidPtrPtrTy,
2330                                 CGM.VoidPtrPtrTy,
2331                                 CGM.Int64Ty->getPointerTo(),
2332                                 CGM.Int64Ty->getPointerTo(),
2333                                 CGM.Int32Ty,
2334                                 CGM.Int32Ty};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2338     break;
2339   }
2340   case OMPRTL__tgt_target_teams_nowait: {
2341     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2342     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2343     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2344     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2345                                 CGM.VoidPtrTy,
2346                                 CGM.Int32Ty,
2347                                 CGM.VoidPtrPtrTy,
2348                                 CGM.VoidPtrPtrTy,
2349                                 CGM.Int64Ty->getPointerTo(),
2350                                 CGM.Int64Ty->getPointerTo(),
2351                                 CGM.Int32Ty,
2352                                 CGM.Int32Ty};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2356     break;
2357   }
2358   case OMPRTL__tgt_register_requires: {
2359     // Build void __tgt_register_requires(int64_t flags);
2360     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2361     auto *FnTy =
2362         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2363     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2364     break;
2365   }
2366   case OMPRTL__tgt_register_lib: {
2367     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2368     QualType ParamTy =
2369         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2370     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2371     auto *FnTy =
2372         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2373     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2374     break;
2375   }
2376   case OMPRTL__tgt_unregister_lib: {
2377     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2378     QualType ParamTy =
2379         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2380     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2381     auto *FnTy =
2382         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2383     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2384     break;
2385   }
2386   case OMPRTL__tgt_target_data_begin: {
2387     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2388     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2389     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2390                                 CGM.Int32Ty,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.VoidPtrPtrTy,
2393                                 CGM.Int64Ty->getPointerTo(),
2394                                 CGM.Int64Ty->getPointerTo()};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2398     break;
2399   }
2400   case OMPRTL__tgt_target_data_begin_nowait: {
2401     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2402     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2403     // *arg_types);
2404     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo()};
2410     auto *FnTy =
2411         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2412     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2413     break;
2414   }
2415   case OMPRTL__tgt_target_data_end: {
2416     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2417     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2418     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2419                                 CGM.Int32Ty,
2420                                 CGM.VoidPtrPtrTy,
2421                                 CGM.VoidPtrPtrTy,
2422                                 CGM.Int64Ty->getPointerTo(),
2423                                 CGM.Int64Ty->getPointerTo()};
2424     auto *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2427     break;
2428   }
2429   case OMPRTL__tgt_target_data_end_nowait: {
2430     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.Int32Ty,
2435                                 CGM.VoidPtrPtrTy,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.Int64Ty->getPointerTo(),
2438                                 CGM.Int64Ty->getPointerTo()};
2439     auto *FnTy =
2440         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2442     break;
2443   }
2444   case OMPRTL__tgt_target_data_update: {
2445     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2446     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2447     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2448                                 CGM.Int32Ty,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.Int64Ty->getPointerTo(),
2452                                 CGM.Int64Ty->getPointerTo()};
2453     auto *FnTy =
2454         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2455     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2456     break;
2457   }
2458   case OMPRTL__tgt_target_data_update_nowait: {
2459     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2460     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2461     // *arg_types);
2462     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2463                                 CGM.Int32Ty,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.Int64Ty->getPointerTo(),
2467                                 CGM.Int64Ty->getPointerTo()};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_mapper_num_components: {
2474     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2475     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2479     break;
2480   }
2481   case OMPRTL__tgt_push_mapper_component: {
2482     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2483     // *base, void *begin, int64_t size, int64_t type);
2484     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2485                                 CGM.Int64Ty, CGM.Int64Ty};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2489     break;
2490   }
2491   }
2492   assert(RTLFn && "Unable to find OpenMP runtime function");
2493   return RTLFn;
2494 }
2495 
2496 llvm::FunctionCallee
2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2498   assert((IVSize == 32 || IVSize == 64) &&
2499          "IV size is not compatible with the omp runtime");
2500   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2501                                             : "__kmpc_for_static_init_4u")
2502                                 : (IVSigned ? "__kmpc_for_static_init_8"
2503                                             : "__kmpc_for_static_init_8u");
2504   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2505   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2506   llvm::Type *TypeParams[] = {
2507     getIdentTyPointerTy(),                     // loc
2508     CGM.Int32Ty,                               // tid
2509     CGM.Int32Ty,                               // schedtype
2510     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2511     PtrTy,                                     // p_lower
2512     PtrTy,                                     // p_upper
2513     PtrTy,                                     // p_stride
2514     ITy,                                       // incr
2515     ITy                                        // chunk
2516   };
2517   auto *FnTy =
2518       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519   return CGM.CreateRuntimeFunction(FnTy, Name);
2520 }
2521 
2522 llvm::FunctionCallee
2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2524   assert((IVSize == 32 || IVSize == 64) &&
2525          "IV size is not compatible with the omp runtime");
2526   StringRef Name =
2527       IVSize == 32
2528           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2529           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2530   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2531   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2532                                CGM.Int32Ty,           // tid
2533                                CGM.Int32Ty,           // schedtype
2534                                ITy,                   // lower
2535                                ITy,                   // upper
2536                                ITy,                   // stride
2537                                ITy                    // chunk
2538   };
2539   auto *FnTy =
2540       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2541   return CGM.CreateRuntimeFunction(FnTy, Name);
2542 }
2543 
2544 llvm::FunctionCallee
2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2546   assert((IVSize == 32 || IVSize == 64) &&
2547          "IV size is not compatible with the omp runtime");
2548   StringRef Name =
2549       IVSize == 32
2550           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2551           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2552   llvm::Type *TypeParams[] = {
2553       getIdentTyPointerTy(), // loc
2554       CGM.Int32Ty,           // tid
2555   };
2556   auto *FnTy =
2557       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2558   return CGM.CreateRuntimeFunction(FnTy, Name);
2559 }
2560 
2561 llvm::FunctionCallee
2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2563   assert((IVSize == 32 || IVSize == 64) &&
2564          "IV size is not compatible with the omp runtime");
2565   StringRef Name =
2566       IVSize == 32
2567           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2568           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2569   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2570   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2571   llvm::Type *TypeParams[] = {
2572     getIdentTyPointerTy(),                     // loc
2573     CGM.Int32Ty,                               // tid
2574     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2575     PtrTy,                                     // p_lower
2576     PtrTy,                                     // p_upper
2577     PtrTy                                      // p_stride
2578   };
2579   auto *FnTy =
2580       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2581   return CGM.CreateRuntimeFunction(FnTy, Name);
2582 }
2583 
2584 /// Obtain information that uniquely identifies a target entry. This
2585 /// consists of the file and device IDs as well as line number associated with
2586 /// the relevant entry source location.
2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2588                                      unsigned &DeviceID, unsigned &FileID,
2589                                      unsigned &LineNum) {
2590   SourceManager &SM = C.getSourceManager();
2591 
2592   // The loc should be always valid and have a file ID (the user cannot use
2593   // #pragma directives in macros)
2594 
2595   assert(Loc.isValid() && "Source location is expected to be always valid.");
2596 
2597   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2598   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2599 
2600   llvm::sys::fs::UniqueID ID;
2601   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2602     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2603         << PLoc.getFilename() << EC.message();
2604 
2605   DeviceID = ID.getDevice();
2606   FileID = ID.getFile();
2607   LineNum = PLoc.getLine();
2608 }
2609 
2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2611   if (CGM.getLangOpts().OpenMPSimd)
2612     return Address::invalid();
2613   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2614       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2615   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2616               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2617                HasRequiresUnifiedSharedMemory))) {
2618     SmallString<64> PtrName;
2619     {
2620       llvm::raw_svector_ostream OS(PtrName);
2621       OS << CGM.getMangledName(GlobalDecl(VD));
2622       if (!VD->isExternallyVisible()) {
2623         unsigned DeviceID, FileID, Line;
2624         getTargetEntryUniqueInfo(CGM.getContext(),
2625                                  VD->getCanonicalDecl()->getBeginLoc(),
2626                                  DeviceID, FileID, Line);
2627         OS << llvm::format("_%x", FileID);
2628       }
2629       OS << "_decl_tgt_ref_ptr";
2630     }
2631     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2632     if (!Ptr) {
2633       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2634       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2635                                         PtrName);
2636 
2637       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2638       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2639 
2640       if (!CGM.getLangOpts().OpenMPIsDevice)
2641         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2642       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2643     }
2644     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2645   }
2646   return Address::invalid();
2647 }
2648 
2649 llvm::Constant *
2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2651   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2652          !CGM.getContext().getTargetInfo().isTLSSupported());
2653   // Lookup the entry, lazily creating it if necessary.
2654   std::string Suffix = getName({"cache", ""});
2655   return getOrCreateInternalVariable(
2656       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2657 }
2658 
2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2660                                                 const VarDecl *VD,
2661                                                 Address VDAddr,
2662                                                 SourceLocation Loc) {
2663   if (CGM.getLangOpts().OpenMPUseTLS &&
2664       CGM.getContext().getTargetInfo().isTLSSupported())
2665     return VDAddr;
2666 
2667   llvm::Type *VarTy = VDAddr.getElementType();
2668   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2669                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2670                                                        CGM.Int8PtrTy),
2671                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2672                          getOrCreateThreadPrivateCache(VD)};
2673   return Address(CGF.EmitRuntimeCall(
2674       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2675                  VDAddr.getAlignment());
2676 }
2677 
2678 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2679     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2680     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2681   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2682   // library.
2683   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2684   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2685                       OMPLoc);
2686   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2687   // to register constructor/destructor for variable.
2688   llvm::Value *Args[] = {
2689       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2690       Ctor, CopyCtor, Dtor};
2691   CGF.EmitRuntimeCall(
2692       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2693 }
2694 
2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2696     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2697     bool PerformInit, CodeGenFunction *CGF) {
2698   if (CGM.getLangOpts().OpenMPUseTLS &&
2699       CGM.getContext().getTargetInfo().isTLSSupported())
2700     return nullptr;
2701 
2702   VD = VD->getDefinition(CGM.getContext());
2703   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2704     QualType ASTTy = VD->getType();
2705 
2706     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2707     const Expr *Init = VD->getAnyInitializer();
2708     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2709       // Generate function that re-emits the declaration's initializer into the
2710       // threadprivate copy of the variable VD
2711       CodeGenFunction CtorCGF(CGM);
2712       FunctionArgList Args;
2713       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2714                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2715                             ImplicitParamDecl::Other);
2716       Args.push_back(&Dst);
2717 
2718       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2719           CGM.getContext().VoidPtrTy, Args);
2720       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2721       std::string Name = getName({"__kmpc_global_ctor_", ""});
2722       llvm::Function *Fn =
2723           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2724       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2725                             Args, Loc, Loc);
2726       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2727           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2728           CGM.getContext().VoidPtrTy, Dst.getLocation());
2729       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2730       Arg = CtorCGF.Builder.CreateElementBitCast(
2731           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2732       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2733                                /*IsInitializer=*/true);
2734       ArgVal = CtorCGF.EmitLoadOfScalar(
2735           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2736           CGM.getContext().VoidPtrTy, Dst.getLocation());
2737       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2738       CtorCGF.FinishFunction();
2739       Ctor = Fn;
2740     }
2741     if (VD->getType().isDestructedType() != QualType::DK_none) {
2742       // Generate function that emits destructor call for the threadprivate copy
2743       // of the variable VD
2744       CodeGenFunction DtorCGF(CGM);
2745       FunctionArgList Args;
2746       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2747                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2748                             ImplicitParamDecl::Other);
2749       Args.push_back(&Dst);
2750 
2751       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2752           CGM.getContext().VoidTy, Args);
2753       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2754       std::string Name = getName({"__kmpc_global_dtor_", ""});
2755       llvm::Function *Fn =
2756           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2757       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2758       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2759                             Loc, Loc);
2760       // Create a scope with an artificial location for the body of this function.
2761       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2762       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2763           DtorCGF.GetAddrOfLocalVar(&Dst),
2764           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2765       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2766                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2767                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2768       DtorCGF.FinishFunction();
2769       Dtor = Fn;
2770     }
2771     // Do not emit init function if it is not required.
2772     if (!Ctor && !Dtor)
2773       return nullptr;
2774 
2775     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2776     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2777                                                /*isVarArg=*/false)
2778                            ->getPointerTo();
2779     // Copying constructor for the threadprivate variable.
2780     // Must be NULL - reserved by runtime, but currently it requires that this
2781     // parameter is always NULL. Otherwise it fires assertion.
2782     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2783     if (Ctor == nullptr) {
2784       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2785                                              /*isVarArg=*/false)
2786                          ->getPointerTo();
2787       Ctor = llvm::Constant::getNullValue(CtorTy);
2788     }
2789     if (Dtor == nullptr) {
2790       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2791                                              /*isVarArg=*/false)
2792                          ->getPointerTo();
2793       Dtor = llvm::Constant::getNullValue(DtorTy);
2794     }
2795     if (!CGF) {
2796       auto *InitFunctionTy =
2797           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2798       std::string Name = getName({"__omp_threadprivate_init_", ""});
2799       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2800           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2801       CodeGenFunction InitCGF(CGM);
2802       FunctionArgList ArgList;
2803       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2804                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2805                             Loc, Loc);
2806       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2807       InitCGF.FinishFunction();
2808       return InitFunction;
2809     }
2810     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2811   }
2812   return nullptr;
2813 }
2814 
2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2816                                                      llvm::GlobalVariable *Addr,
2817                                                      bool PerformInit) {
2818   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2819       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2820   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2821       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2822        HasRequiresUnifiedSharedMemory))
2823     return CGM.getLangOpts().OpenMPIsDevice;
2824   VD = VD->getDefinition(CGM.getContext());
2825   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2826     return CGM.getLangOpts().OpenMPIsDevice;
2827 
2828   QualType ASTTy = VD->getType();
2829 
2830   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2831   // Produce the unique prefix to identify the new target regions. We use
2832   // the source location of the variable declaration which we know to not
2833   // conflict with any target region.
2834   unsigned DeviceID;
2835   unsigned FileID;
2836   unsigned Line;
2837   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2838   SmallString<128> Buffer, Out;
2839   {
2840     llvm::raw_svector_ostream OS(Buffer);
2841     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2842        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2843   }
2844 
2845   const Expr *Init = VD->getAnyInitializer();
2846   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2847     llvm::Constant *Ctor;
2848     llvm::Constant *ID;
2849     if (CGM.getLangOpts().OpenMPIsDevice) {
2850       // Generate function that re-emits the declaration's initializer into
2851       // the threadprivate copy of the variable VD
2852       CodeGenFunction CtorCGF(CGM);
2853 
2854       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2855       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2856       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2857           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2858       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2859       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2860                             FunctionArgList(), Loc, Loc);
2861       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2862       CtorCGF.EmitAnyExprToMem(Init,
2863                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2864                                Init->getType().getQualifiers(),
2865                                /*IsInitializer=*/true);
2866       CtorCGF.FinishFunction();
2867       Ctor = Fn;
2868       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2869       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2870     } else {
2871       Ctor = new llvm::GlobalVariable(
2872           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2873           llvm::GlobalValue::PrivateLinkage,
2874           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2875       ID = Ctor;
2876     }
2877 
2878     // Register the information for the entry associated with the constructor.
2879     Out.clear();
2880     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2881         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2882         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2883   }
2884   if (VD->getType().isDestructedType() != QualType::DK_none) {
2885     llvm::Constant *Dtor;
2886     llvm::Constant *ID;
2887     if (CGM.getLangOpts().OpenMPIsDevice) {
2888       // Generate function that emits destructor call for the threadprivate
2889       // copy of the variable VD
2890       CodeGenFunction DtorCGF(CGM);
2891 
2892       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2893       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2894       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2895           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2896       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2897       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2898                             FunctionArgList(), Loc, Loc);
2899       // Create a scope with an artificial location for the body of this
2900       // function.
2901       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2902       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2903                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2904                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2905       DtorCGF.FinishFunction();
2906       Dtor = Fn;
2907       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2908       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2909     } else {
2910       Dtor = new llvm::GlobalVariable(
2911           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2912           llvm::GlobalValue::PrivateLinkage,
2913           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2914       ID = Dtor;
2915     }
2916     // Register the information for the entry associated with the destructor.
2917     Out.clear();
2918     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2919         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2920         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2921   }
2922   return CGM.getLangOpts().OpenMPIsDevice;
2923 }
2924 
2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2926                                                           QualType VarType,
2927                                                           StringRef Name) {
2928   std::string Suffix = getName({"artificial", ""});
2929   std::string CacheSuffix = getName({"cache", ""});
2930   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2931   llvm::Value *GAddr =
2932       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, SourceLocation()),
2935       getThreadID(CGF, SourceLocation()),
2936       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2937       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2938                                 /*isSigned=*/false),
2939       getOrCreateInternalVariable(
2940           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2941   return Address(
2942       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2943           CGF.EmitRuntimeCall(
2944               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2945           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2946       CGM.getPointerAlign());
2947 }
2948 
2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2950                                       const RegionCodeGenTy &ThenGen,
2951                                       const RegionCodeGenTy &ElseGen) {
2952   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2953 
2954   // If the condition constant folds and can be elided, try to avoid emitting
2955   // the condition and the dead arm of the if/else.
2956   bool CondConstant;
2957   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2958     if (CondConstant)
2959       ThenGen(CGF);
2960     else
2961       ElseGen(CGF);
2962     return;
2963   }
2964 
2965   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2966   // emit the conditional branch.
2967   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2968   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2969   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2970   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2971 
2972   // Emit the 'then' code.
2973   CGF.EmitBlock(ThenBlock);
2974   ThenGen(CGF);
2975   CGF.EmitBranch(ContBlock);
2976   // Emit the 'else' code if present.
2977   // There is no need to emit line number for unconditional branch.
2978   (void)ApplyDebugLocation::CreateEmpty(CGF);
2979   CGF.EmitBlock(ElseBlock);
2980   ElseGen(CGF);
2981   // There is no need to emit line number for unconditional branch.
2982   (void)ApplyDebugLocation::CreateEmpty(CGF);
2983   CGF.EmitBranch(ContBlock);
2984   // Emit the continuation block for code after the if.
2985   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2986 }
2987 
2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2989                                        llvm::Function *OutlinedFn,
2990                                        ArrayRef<llvm::Value *> CapturedVars,
2991                                        const Expr *IfCond) {
2992   if (!CGF.HaveInsertPoint())
2993     return;
2994   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2995   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2996                                                      PrePostActionTy &) {
2997     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2998     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2999     llvm::Value *Args[] = {
3000         RTLoc,
3001         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3002         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3003     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3004     RealArgs.append(std::begin(Args), std::end(Args));
3005     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3006 
3007     llvm::FunctionCallee RTLFn =
3008         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3009     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3010   };
3011   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3012                                                           PrePostActionTy &) {
3013     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3014     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3015     // Build calls:
3016     // __kmpc_serialized_parallel(&Loc, GTid);
3017     llvm::Value *Args[] = {RTLoc, ThreadID};
3018     CGF.EmitRuntimeCall(
3019         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3020 
3021     // OutlinedFn(&GTid, &zero, CapturedStruct);
3022     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3023                                                         /*Name*/ ".zero.addr");
3024     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
3025     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3026     // ThreadId for serialized parallels is 0.
3027     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3028     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3029     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3030     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3031 
3032     // __kmpc_end_serialized_parallel(&Loc, GTid);
3033     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3034     CGF.EmitRuntimeCall(
3035         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3036         EndArgs);
3037   };
3038   if (IfCond) {
3039     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3040   } else {
3041     RegionCodeGenTy ThenRCG(ThenGen);
3042     ThenRCG(CGF);
3043   }
3044 }
3045 
3046 // If we're inside an (outlined) parallel region, use the region info's
3047 // thread-ID variable (it is passed in a first argument of the outlined function
3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3049 // regular serial code region, get thread ID by calling kmp_int32
3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3051 // return the address of that temp.
3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3053                                              SourceLocation Loc) {
3054   if (auto *OMPRegionInfo =
3055           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3056     if (OMPRegionInfo->getThreadIDVariable())
3057       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3058 
3059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3060   QualType Int32Ty =
3061       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3062   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3063   CGF.EmitStoreOfScalar(ThreadID,
3064                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3065 
3066   return ThreadIDTemp;
3067 }
3068 
3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3070     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3071   SmallString<256> Buffer;
3072   llvm::raw_svector_ostream Out(Buffer);
3073   Out << Name;
3074   StringRef RuntimeName = Out.str();
3075   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3076   if (Elem.second) {
3077     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3078            "OMP internal variable has different type than requested");
3079     return &*Elem.second;
3080   }
3081 
3082   return Elem.second = new llvm::GlobalVariable(
3083              CGM.getModule(), Ty, /*IsConstant*/ false,
3084              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3085              Elem.first(), /*InsertBefore=*/nullptr,
3086              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3087 }
3088 
3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3090   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3091   std::string Name = getName({Prefix, "var"});
3092   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3093 }
3094 
3095 namespace {
3096 /// Common pre(post)-action for different OpenMP constructs.
3097 class CommonActionTy final : public PrePostActionTy {
3098   llvm::FunctionCallee EnterCallee;
3099   ArrayRef<llvm::Value *> EnterArgs;
3100   llvm::FunctionCallee ExitCallee;
3101   ArrayRef<llvm::Value *> ExitArgs;
3102   bool Conditional;
3103   llvm::BasicBlock *ContBlock = nullptr;
3104 
3105 public:
3106   CommonActionTy(llvm::FunctionCallee EnterCallee,
3107                  ArrayRef<llvm::Value *> EnterArgs,
3108                  llvm::FunctionCallee ExitCallee,
3109                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3110       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3111         ExitArgs(ExitArgs), Conditional(Conditional) {}
3112   void Enter(CodeGenFunction &CGF) override {
3113     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3114     if (Conditional) {
3115       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3116       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3117       ContBlock = CGF.createBasicBlock("omp_if.end");
3118       // Generate the branch (If-stmt)
3119       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3120       CGF.EmitBlock(ThenBlock);
3121     }
3122   }
3123   void Done(CodeGenFunction &CGF) {
3124     // Emit the rest of blocks/branches
3125     CGF.EmitBranch(ContBlock);
3126     CGF.EmitBlock(ContBlock, true);
3127   }
3128   void Exit(CodeGenFunction &CGF) override {
3129     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3130   }
3131 };
3132 } // anonymous namespace
3133 
3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3135                                          StringRef CriticalName,
3136                                          const RegionCodeGenTy &CriticalOpGen,
3137                                          SourceLocation Loc, const Expr *Hint) {
3138   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3139   // CriticalOpGen();
3140   // __kmpc_end_critical(ident_t *, gtid, Lock);
3141   // Prepare arguments and build a call to __kmpc_critical
3142   if (!CGF.HaveInsertPoint())
3143     return;
3144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3145                          getCriticalRegionLock(CriticalName)};
3146   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3147                                                 std::end(Args));
3148   if (Hint) {
3149     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3150         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3151   }
3152   CommonActionTy Action(
3153       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3154                                  : OMPRTL__kmpc_critical),
3155       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3156   CriticalOpGen.setAction(Action);
3157   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3158 }
3159 
3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3161                                        const RegionCodeGenTy &MasterOpGen,
3162                                        SourceLocation Loc) {
3163   if (!CGF.HaveInsertPoint())
3164     return;
3165   // if(__kmpc_master(ident_t *, gtid)) {
3166   //   MasterOpGen();
3167   //   __kmpc_end_master(ident_t *, gtid);
3168   // }
3169   // Prepare arguments and build a call to __kmpc_master
3170   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3171   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3172                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3173                         /*Conditional=*/true);
3174   MasterOpGen.setAction(Action);
3175   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3176   Action.Done(CGF);
3177 }
3178 
3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3180                                         SourceLocation Loc) {
3181   if (!CGF.HaveInsertPoint())
3182     return;
3183   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3184   llvm::Value *Args[] = {
3185       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3186       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3187   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3188   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3189     Region->emitUntiedSwitch(CGF);
3190 }
3191 
3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3193                                           const RegionCodeGenTy &TaskgroupOpGen,
3194                                           SourceLocation Loc) {
3195   if (!CGF.HaveInsertPoint())
3196     return;
3197   // __kmpc_taskgroup(ident_t *, gtid);
3198   // TaskgroupOpGen();
3199   // __kmpc_end_taskgroup(ident_t *, gtid);
3200   // Prepare arguments and build a call to __kmpc_taskgroup
3201   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3202   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3203                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3204                         Args);
3205   TaskgroupOpGen.setAction(Action);
3206   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3207 }
3208 
3209 /// Given an array of pointers to variables, project the address of a
3210 /// given variable.
3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3212                                       unsigned Index, const VarDecl *Var) {
3213   // Pull out the pointer to the variable.
3214   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3215   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3216 
3217   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3218   Addr = CGF.Builder.CreateElementBitCast(
3219       Addr, CGF.ConvertTypeForMem(Var->getType()));
3220   return Addr;
3221 }
3222 
3223 static llvm::Value *emitCopyprivateCopyFunction(
3224     CodeGenModule &CGM, llvm::Type *ArgsType,
3225     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3226     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3227     SourceLocation Loc) {
3228   ASTContext &C = CGM.getContext();
3229   // void copy_func(void *LHSArg, void *RHSArg);
3230   FunctionArgList Args;
3231   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3232                            ImplicitParamDecl::Other);
3233   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3234                            ImplicitParamDecl::Other);
3235   Args.push_back(&LHSArg);
3236   Args.push_back(&RHSArg);
3237   const auto &CGFI =
3238       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3239   std::string Name =
3240       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3241   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3242                                     llvm::GlobalValue::InternalLinkage, Name,
3243                                     &CGM.getModule());
3244   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3245   Fn->setDoesNotRecurse();
3246   CodeGenFunction CGF(CGM);
3247   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3248   // Dest = (void*[n])(LHSArg);
3249   // Src = (void*[n])(RHSArg);
3250   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3251       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3252       ArgsType), CGF.getPointerAlign());
3253   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3254       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3255       ArgsType), CGF.getPointerAlign());
3256   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3257   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3258   // ...
3259   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3260   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3261     const auto *DestVar =
3262         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3263     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3264 
3265     const auto *SrcVar =
3266         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3267     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3268 
3269     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3270     QualType Type = VD->getType();
3271     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3272   }
3273   CGF.FinishFunction();
3274   return Fn;
3275 }
3276 
3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3278                                        const RegionCodeGenTy &SingleOpGen,
3279                                        SourceLocation Loc,
3280                                        ArrayRef<const Expr *> CopyprivateVars,
3281                                        ArrayRef<const Expr *> SrcExprs,
3282                                        ArrayRef<const Expr *> DstExprs,
3283                                        ArrayRef<const Expr *> AssignmentOps) {
3284   if (!CGF.HaveInsertPoint())
3285     return;
3286   assert(CopyprivateVars.size() == SrcExprs.size() &&
3287          CopyprivateVars.size() == DstExprs.size() &&
3288          CopyprivateVars.size() == AssignmentOps.size());
3289   ASTContext &C = CGM.getContext();
3290   // int32 did_it = 0;
3291   // if(__kmpc_single(ident_t *, gtid)) {
3292   //   SingleOpGen();
3293   //   __kmpc_end_single(ident_t *, gtid);
3294   //   did_it = 1;
3295   // }
3296   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3297   // <copy_func>, did_it);
3298 
3299   Address DidIt = Address::invalid();
3300   if (!CopyprivateVars.empty()) {
3301     // int32 did_it = 0;
3302     QualType KmpInt32Ty =
3303         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3304     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3305     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3306   }
3307   // Prepare arguments and build a call to __kmpc_single
3308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3309   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3310                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3311                         /*Conditional=*/true);
3312   SingleOpGen.setAction(Action);
3313   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3314   if (DidIt.isValid()) {
3315     // did_it = 1;
3316     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3317   }
3318   Action.Done(CGF);
3319   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3320   // <copy_func>, did_it);
3321   if (DidIt.isValid()) {
3322     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3323     QualType CopyprivateArrayTy =
3324         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3325                                /*IndexTypeQuals=*/0);
3326     // Create a list of all private variables for copyprivate.
3327     Address CopyprivateList =
3328         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3329     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3330       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3331       CGF.Builder.CreateStore(
3332           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3333               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3334           Elem);
3335     }
3336     // Build function that copies private values from single region to all other
3337     // threads in the corresponding parallel region.
3338     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3339         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3340         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3341     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3342     Address CL =
3343       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3344                                                       CGF.VoidPtrTy);
3345     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3346     llvm::Value *Args[] = {
3347         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3348         getThreadID(CGF, Loc),        // i32 <gtid>
3349         BufSize,                      // size_t <buf_size>
3350         CL.getPointer(),              // void *<copyprivate list>
3351         CpyFn,                        // void (*) (void *, void *) <copy_func>
3352         DidItVal                      // i32 did_it
3353     };
3354     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3355   }
3356 }
3357 
3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3359                                         const RegionCodeGenTy &OrderedOpGen,
3360                                         SourceLocation Loc, bool IsThreads) {
3361   if (!CGF.HaveInsertPoint())
3362     return;
3363   // __kmpc_ordered(ident_t *, gtid);
3364   // OrderedOpGen();
3365   // __kmpc_end_ordered(ident_t *, gtid);
3366   // Prepare arguments and build a call to __kmpc_ordered
3367   if (IsThreads) {
3368     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3369     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3370                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3371                           Args);
3372     OrderedOpGen.setAction(Action);
3373     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3374     return;
3375   }
3376   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3377 }
3378 
3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3380   unsigned Flags;
3381   if (Kind == OMPD_for)
3382     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3383   else if (Kind == OMPD_sections)
3384     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3385   else if (Kind == OMPD_single)
3386     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3387   else if (Kind == OMPD_barrier)
3388     Flags = OMP_IDENT_BARRIER_EXPL;
3389   else
3390     Flags = OMP_IDENT_BARRIER_IMPL;
3391   return Flags;
3392 }
3393 
3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3395     CodeGenFunction &CGF, const OMPLoopDirective &S,
3396     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3397   // Check if the loop directive is actually a doacross loop directive. In this
3398   // case choose static, 1 schedule.
3399   if (llvm::any_of(
3400           S.getClausesOfKind<OMPOrderedClause>(),
3401           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3402     ScheduleKind = OMPC_SCHEDULE_static;
3403     // Chunk size is 1 in this case.
3404     llvm::APInt ChunkSize(32, 1);
3405     ChunkExpr = IntegerLiteral::Create(
3406         CGF.getContext(), ChunkSize,
3407         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3408         SourceLocation());
3409   }
3410 }
3411 
3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3413                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3414                                       bool ForceSimpleCall) {
3415   if (!CGF.HaveInsertPoint())
3416     return;
3417   // Build call __kmpc_cancel_barrier(loc, thread_id);
3418   // Build call __kmpc_barrier(loc, thread_id);
3419   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3420   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3421   // thread_id);
3422   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3423                          getThreadID(CGF, Loc)};
3424   if (auto *OMPRegionInfo =
3425           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3426     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3427       llvm::Value *Result = CGF.EmitRuntimeCall(
3428           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3429       if (EmitChecks) {
3430         // if (__kmpc_cancel_barrier()) {
3431         //   exit from construct;
3432         // }
3433         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3434         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3435         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3436         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3437         CGF.EmitBlock(ExitBB);
3438         //   exit from construct;
3439         CodeGenFunction::JumpDest CancelDestination =
3440             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3441         CGF.EmitBranchThroughCleanup(CancelDestination);
3442         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3443       }
3444       return;
3445     }
3446   }
3447   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3448 }
3449 
3450 /// Map the OpenMP loop schedule to the runtime enumeration.
3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3452                                           bool Chunked, bool Ordered) {
3453   switch (ScheduleKind) {
3454   case OMPC_SCHEDULE_static:
3455     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3456                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3457   case OMPC_SCHEDULE_dynamic:
3458     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3459   case OMPC_SCHEDULE_guided:
3460     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3461   case OMPC_SCHEDULE_runtime:
3462     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3463   case OMPC_SCHEDULE_auto:
3464     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3465   case OMPC_SCHEDULE_unknown:
3466     assert(!Chunked && "chunk was specified but schedule kind not known");
3467     return Ordered ? OMP_ord_static : OMP_sch_static;
3468   }
3469   llvm_unreachable("Unexpected runtime schedule");
3470 }
3471 
3472 /// Map the OpenMP distribute schedule to the runtime enumeration.
3473 static OpenMPSchedType
3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3475   // only static is allowed for dist_schedule
3476   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3477 }
3478 
3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3480                                          bool Chunked) const {
3481   OpenMPSchedType Schedule =
3482       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3483   return Schedule == OMP_sch_static;
3484 }
3485 
3486 bool CGOpenMPRuntime::isStaticNonchunked(
3487     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3488   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3489   return Schedule == OMP_dist_sch_static;
3490 }
3491 
3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3493                                       bool Chunked) const {
3494   OpenMPSchedType Schedule =
3495       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3496   return Schedule == OMP_sch_static_chunked;
3497 }
3498 
3499 bool CGOpenMPRuntime::isStaticChunked(
3500     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3501   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3502   return Schedule == OMP_dist_sch_static_chunked;
3503 }
3504 
3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3506   OpenMPSchedType Schedule =
3507       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3508   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3509   return Schedule != OMP_sch_static;
3510 }
3511 
3512 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3513                                   OpenMPScheduleClauseModifier M1,
3514                                   OpenMPScheduleClauseModifier M2) {
3515   int Modifier = 0;
3516   switch (M1) {
3517   case OMPC_SCHEDULE_MODIFIER_monotonic:
3518     Modifier = OMP_sch_modifier_monotonic;
3519     break;
3520   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3521     Modifier = OMP_sch_modifier_nonmonotonic;
3522     break;
3523   case OMPC_SCHEDULE_MODIFIER_simd:
3524     if (Schedule == OMP_sch_static_chunked)
3525       Schedule = OMP_sch_static_balanced_chunked;
3526     break;
3527   case OMPC_SCHEDULE_MODIFIER_last:
3528   case OMPC_SCHEDULE_MODIFIER_unknown:
3529     break;
3530   }
3531   switch (M2) {
3532   case OMPC_SCHEDULE_MODIFIER_monotonic:
3533     Modifier = OMP_sch_modifier_monotonic;
3534     break;
3535   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3536     Modifier = OMP_sch_modifier_nonmonotonic;
3537     break;
3538   case OMPC_SCHEDULE_MODIFIER_simd:
3539     if (Schedule == OMP_sch_static_chunked)
3540       Schedule = OMP_sch_static_balanced_chunked;
3541     break;
3542   case OMPC_SCHEDULE_MODIFIER_last:
3543   case OMPC_SCHEDULE_MODIFIER_unknown:
3544     break;
3545   }
3546   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3547   // If the static schedule kind is specified or if the ordered clause is
3548   // specified, and if the nonmonotonic modifier is not specified, the effect is
3549   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3550   // modifier is specified, the effect is as if the nonmonotonic modifier is
3551   // specified.
3552   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3553     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3554           Schedule == OMP_sch_static_balanced_chunked ||
3555           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
3556       Modifier = OMP_sch_modifier_nonmonotonic;
3557   }
3558   return Schedule | Modifier;
3559 }
3560 
3561 void CGOpenMPRuntime::emitForDispatchInit(
3562     CodeGenFunction &CGF, SourceLocation Loc,
3563     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3564     bool Ordered, const DispatchRTInput &DispatchValues) {
3565   if (!CGF.HaveInsertPoint())
3566     return;
3567   OpenMPSchedType Schedule = getRuntimeSchedule(
3568       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3569   assert(Ordered ||
3570          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3571           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3572           Schedule != OMP_sch_static_balanced_chunked));
3573   // Call __kmpc_dispatch_init(
3574   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3575   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3576   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3577 
3578   // If the Chunk was not specified in the clause - use default value 1.
3579   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3580                                             : CGF.Builder.getIntN(IVSize, 1);
3581   llvm::Value *Args[] = {
3582       emitUpdateLocation(CGF, Loc),
3583       getThreadID(CGF, Loc),
3584       CGF.Builder.getInt32(addMonoNonMonoModifier(
3585           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3586       DispatchValues.LB,                                     // Lower
3587       DispatchValues.UB,                                     // Upper
3588       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3589       Chunk                                                  // Chunk
3590   };
3591   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3592 }
3593 
3594 static void emitForStaticInitCall(
3595     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3596     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3597     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3598     const CGOpenMPRuntime::StaticRTInput &Values) {
3599   if (!CGF.HaveInsertPoint())
3600     return;
3601 
3602   assert(!Values.Ordered);
3603   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3604          Schedule == OMP_sch_static_balanced_chunked ||
3605          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3606          Schedule == OMP_dist_sch_static ||
3607          Schedule == OMP_dist_sch_static_chunked);
3608 
3609   // Call __kmpc_for_static_init(
3610   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3611   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3612   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3613   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3614   llvm::Value *Chunk = Values.Chunk;
3615   if (Chunk == nullptr) {
3616     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3617             Schedule == OMP_dist_sch_static) &&
3618            "expected static non-chunked schedule");
3619     // If the Chunk was not specified in the clause - use default value 1.
3620     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3621   } else {
3622     assert((Schedule == OMP_sch_static_chunked ||
3623             Schedule == OMP_sch_static_balanced_chunked ||
3624             Schedule == OMP_ord_static_chunked ||
3625             Schedule == OMP_dist_sch_static_chunked) &&
3626            "expected static chunked schedule");
3627   }
3628   llvm::Value *Args[] = {
3629       UpdateLocation,
3630       ThreadId,
3631       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3632                                                   M2)), // Schedule type
3633       Values.IL.getPointer(),                           // &isLastIter
3634       Values.LB.getPointer(),                           // &LB
3635       Values.UB.getPointer(),                           // &UB
3636       Values.ST.getPointer(),                           // &Stride
3637       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3638       Chunk                                             // Chunk
3639   };
3640   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3641 }
3642 
3643 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3644                                         SourceLocation Loc,
3645                                         OpenMPDirectiveKind DKind,
3646                                         const OpenMPScheduleTy &ScheduleKind,
3647                                         const StaticRTInput &Values) {
3648   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3649       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3650   assert(isOpenMPWorksharingDirective(DKind) &&
3651          "Expected loop-based or sections-based directive.");
3652   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3653                                              isOpenMPLoopDirective(DKind)
3654                                                  ? OMP_IDENT_WORK_LOOP
3655                                                  : OMP_IDENT_WORK_SECTIONS);
3656   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3657   llvm::FunctionCallee StaticInitFunction =
3658       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3659   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3660                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3661 }
3662 
3663 void CGOpenMPRuntime::emitDistributeStaticInit(
3664     CodeGenFunction &CGF, SourceLocation Loc,
3665     OpenMPDistScheduleClauseKind SchedKind,
3666     const CGOpenMPRuntime::StaticRTInput &Values) {
3667   OpenMPSchedType ScheduleNum =
3668       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3669   llvm::Value *UpdatedLocation =
3670       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3671   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3672   llvm::FunctionCallee StaticInitFunction =
3673       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3674   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3675                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3676                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3677 }
3678 
3679 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3680                                           SourceLocation Loc,
3681                                           OpenMPDirectiveKind DKind) {
3682   if (!CGF.HaveInsertPoint())
3683     return;
3684   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3685   llvm::Value *Args[] = {
3686       emitUpdateLocation(CGF, Loc,
3687                          isOpenMPDistributeDirective(DKind)
3688                              ? OMP_IDENT_WORK_DISTRIBUTE
3689                              : isOpenMPLoopDirective(DKind)
3690                                    ? OMP_IDENT_WORK_LOOP
3691                                    : OMP_IDENT_WORK_SECTIONS),
3692       getThreadID(CGF, Loc)};
3693   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3694                       Args);
3695 }
3696 
3697 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3698                                                  SourceLocation Loc,
3699                                                  unsigned IVSize,
3700                                                  bool IVSigned) {
3701   if (!CGF.HaveInsertPoint())
3702     return;
3703   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3704   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3705   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3706 }
3707 
3708 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3709                                           SourceLocation Loc, unsigned IVSize,
3710                                           bool IVSigned, Address IL,
3711                                           Address LB, Address UB,
3712                                           Address ST) {
3713   // Call __kmpc_dispatch_next(
3714   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3715   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3716   //          kmp_int[32|64] *p_stride);
3717   llvm::Value *Args[] = {
3718       emitUpdateLocation(CGF, Loc),
3719       getThreadID(CGF, Loc),
3720       IL.getPointer(), // &isLastIter
3721       LB.getPointer(), // &Lower
3722       UB.getPointer(), // &Upper
3723       ST.getPointer()  // &Stride
3724   };
3725   llvm::Value *Call =
3726       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3727   return CGF.EmitScalarConversion(
3728       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3729       CGF.getContext().BoolTy, Loc);
3730 }
3731 
3732 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3733                                            llvm::Value *NumThreads,
3734                                            SourceLocation Loc) {
3735   if (!CGF.HaveInsertPoint())
3736     return;
3737   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3738   llvm::Value *Args[] = {
3739       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3740       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3741   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3742                       Args);
3743 }
3744 
3745 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3746                                          OpenMPProcBindClauseKind ProcBind,
3747                                          SourceLocation Loc) {
3748   if (!CGF.HaveInsertPoint())
3749     return;
3750   // Constants for proc bind value accepted by the runtime.
3751   enum ProcBindTy {
3752     ProcBindFalse = 0,
3753     ProcBindTrue,
3754     ProcBindMaster,
3755     ProcBindClose,
3756     ProcBindSpread,
3757     ProcBindIntel,
3758     ProcBindDefault
3759   } RuntimeProcBind;
3760   switch (ProcBind) {
3761   case OMPC_PROC_BIND_master:
3762     RuntimeProcBind = ProcBindMaster;
3763     break;
3764   case OMPC_PROC_BIND_close:
3765     RuntimeProcBind = ProcBindClose;
3766     break;
3767   case OMPC_PROC_BIND_spread:
3768     RuntimeProcBind = ProcBindSpread;
3769     break;
3770   case OMPC_PROC_BIND_unknown:
3771     llvm_unreachable("Unsupported proc_bind value.");
3772   }
3773   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3774   llvm::Value *Args[] = {
3775       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3776       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3777   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3778 }
3779 
3780 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3781                                 SourceLocation Loc) {
3782   if (!CGF.HaveInsertPoint())
3783     return;
3784   // Build call void __kmpc_flush(ident_t *loc)
3785   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3786                       emitUpdateLocation(CGF, Loc));
3787 }
3788 
3789 namespace {
3790 /// Indexes of fields for type kmp_task_t.
3791 enum KmpTaskTFields {
3792   /// List of shared variables.
3793   KmpTaskTShareds,
3794   /// Task routine.
3795   KmpTaskTRoutine,
3796   /// Partition id for the untied tasks.
3797   KmpTaskTPartId,
3798   /// Function with call of destructors for private variables.
3799   Data1,
3800   /// Task priority.
3801   Data2,
3802   /// (Taskloops only) Lower bound.
3803   KmpTaskTLowerBound,
3804   /// (Taskloops only) Upper bound.
3805   KmpTaskTUpperBound,
3806   /// (Taskloops only) Stride.
3807   KmpTaskTStride,
3808   /// (Taskloops only) Is last iteration flag.
3809   KmpTaskTLastIter,
3810   /// (Taskloops only) Reduction data.
3811   KmpTaskTReductions,
3812 };
3813 } // anonymous namespace
3814 
3815 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3816   return OffloadEntriesTargetRegion.empty() &&
3817          OffloadEntriesDeviceGlobalVar.empty();
3818 }
3819 
3820 /// Initialize target region entry.
3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3822     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3823                                     StringRef ParentName, unsigned LineNum,
3824                                     unsigned Order) {
3825   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3826                                              "only required for the device "
3827                                              "code generation.");
3828   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3829       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3830                                    OMPTargetRegionEntryTargetRegion);
3831   ++OffloadingEntriesNum;
3832 }
3833 
3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3835     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3836                                   StringRef ParentName, unsigned LineNum,
3837                                   llvm::Constant *Addr, llvm::Constant *ID,
3838                                   OMPTargetRegionEntryKind Flags) {
3839   // If we are emitting code for a target, the entry is already initialized,
3840   // only has to be registered.
3841   if (CGM.getLangOpts().OpenMPIsDevice) {
3842     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3843       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3844           DiagnosticsEngine::Error,
3845           "Unable to find target region on line '%0' in the device code.");
3846       CGM.getDiags().Report(DiagID) << LineNum;
3847       return;
3848     }
3849     auto &Entry =
3850         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3851     assert(Entry.isValid() && "Entry not initialized!");
3852     Entry.setAddress(Addr);
3853     Entry.setID(ID);
3854     Entry.setFlags(Flags);
3855   } else {
3856     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3857     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3858     ++OffloadingEntriesNum;
3859   }
3860 }
3861 
3862 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3863     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3864     unsigned LineNum) const {
3865   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3866   if (PerDevice == OffloadEntriesTargetRegion.end())
3867     return false;
3868   auto PerFile = PerDevice->second.find(FileID);
3869   if (PerFile == PerDevice->second.end())
3870     return false;
3871   auto PerParentName = PerFile->second.find(ParentName);
3872   if (PerParentName == PerFile->second.end())
3873     return false;
3874   auto PerLine = PerParentName->second.find(LineNum);
3875   if (PerLine == PerParentName->second.end())
3876     return false;
3877   // Fail if this entry is already registered.
3878   if (PerLine->second.getAddress() || PerLine->second.getID())
3879     return false;
3880   return true;
3881 }
3882 
3883 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3884     const OffloadTargetRegionEntryInfoActTy &Action) {
3885   // Scan all target region entries and perform the provided action.
3886   for (const auto &D : OffloadEntriesTargetRegion)
3887     for (const auto &F : D.second)
3888       for (const auto &P : F.second)
3889         for (const auto &L : P.second)
3890           Action(D.first, F.first, P.first(), L.first, L.second);
3891 }
3892 
3893 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3894     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3895                                        OMPTargetGlobalVarEntryKind Flags,
3896                                        unsigned Order) {
3897   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3898                                              "only required for the device "
3899                                              "code generation.");
3900   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3901   ++OffloadingEntriesNum;
3902 }
3903 
3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3905     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3906                                      CharUnits VarSize,
3907                                      OMPTargetGlobalVarEntryKind Flags,
3908                                      llvm::GlobalValue::LinkageTypes Linkage) {
3909   if (CGM.getLangOpts().OpenMPIsDevice) {
3910     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3911     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3912            "Entry not initialized!");
3913     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3914            "Resetting with the new address.");
3915     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3916       if (Entry.getVarSize().isZero()) {
3917         Entry.setVarSize(VarSize);
3918         Entry.setLinkage(Linkage);
3919       }
3920       return;
3921     }
3922     Entry.setVarSize(VarSize);
3923     Entry.setLinkage(Linkage);
3924     Entry.setAddress(Addr);
3925   } else {
3926     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3927       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3928       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3929              "Entry not initialized!");
3930       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3931              "Resetting with the new address.");
3932       if (Entry.getVarSize().isZero()) {
3933         Entry.setVarSize(VarSize);
3934         Entry.setLinkage(Linkage);
3935       }
3936       return;
3937     }
3938     OffloadEntriesDeviceGlobalVar.try_emplace(
3939         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3940     ++OffloadingEntriesNum;
3941   }
3942 }
3943 
3944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3945     actOnDeviceGlobalVarEntriesInfo(
3946         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3947   // Scan all target region entries and perform the provided action.
3948   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3949     Action(E.getKey(), E.getValue());
3950 }
3951 
3952 llvm::Function *
3953 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3954   // If we don't have entries or if we are emitting code for the device, we
3955   // don't need to do anything.
3956   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3957     return nullptr;
3958 
3959   llvm::Module &M = CGM.getModule();
3960   ASTContext &C = CGM.getContext();
3961 
3962   // Get list of devices we care about
3963   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3964 
3965   // We should be creating an offloading descriptor only if there are devices
3966   // specified.
3967   assert(!Devices.empty() && "No OpenMP offloading devices??");
3968 
3969   // Create the external variables that will point to the begin and end of the
3970   // host entries section. These will be defined by the linker.
3971   llvm::Type *OffloadEntryTy =
3972       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3973   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3974   auto *HostEntriesBegin = new llvm::GlobalVariable(
3975       M, OffloadEntryTy, /*isConstant=*/true,
3976       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3977       EntriesBeginName);
3978   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3979   auto *HostEntriesEnd =
3980       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3981                                llvm::GlobalValue::ExternalLinkage,
3982                                /*Initializer=*/nullptr, EntriesEndName);
3983 
3984   // Create all device images
3985   auto *DeviceImageTy = cast<llvm::StructType>(
3986       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3987   ConstantInitBuilder DeviceImagesBuilder(CGM);
3988   ConstantArrayBuilder DeviceImagesEntries =
3989       DeviceImagesBuilder.beginArray(DeviceImageTy);
3990 
3991   for (const llvm::Triple &Device : Devices) {
3992     StringRef T = Device.getTriple();
3993     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3994     auto *ImgBegin = new llvm::GlobalVariable(
3995         M, CGM.Int8Ty, /*isConstant=*/true,
3996         llvm::GlobalValue::ExternalWeakLinkage,
3997         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3998     std::string EndName = getName({"omp_offloading", "img_end", ""});
3999     auto *ImgEnd = new llvm::GlobalVariable(
4000         M, CGM.Int8Ty, /*isConstant=*/true,
4001         llvm::GlobalValue::ExternalWeakLinkage,
4002         /*Initializer=*/nullptr, Twine(EndName).concat(T));
4003 
4004     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
4005                               HostEntriesEnd};
4006     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
4007                                              DeviceImagesEntries);
4008   }
4009 
4010   // Create device images global array.
4011   std::string ImagesName = getName({"omp_offloading", "device_images"});
4012   llvm::GlobalVariable *DeviceImages =
4013       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
4014                                                 CGM.getPointerAlign(),
4015                                                 /*isConstant=*/true);
4016   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4017 
4018   // This is a Zero array to be used in the creation of the constant expressions
4019   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
4020                              llvm::Constant::getNullValue(CGM.Int32Ty)};
4021 
4022   // Create the target region descriptor.
4023   llvm::Constant *Data[] = {
4024       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
4025       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
4026                                            DeviceImages, Index),
4027       HostEntriesBegin, HostEntriesEnd};
4028   std::string Descriptor = getName({"omp_offloading", "descriptor"});
4029   llvm::GlobalVariable *Desc = createGlobalStruct(
4030       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
4031 
4032   // Emit code to register or unregister the descriptor at execution
4033   // startup or closing, respectively.
4034 
4035   llvm::Function *UnRegFn;
4036   {
4037     FunctionArgList Args;
4038     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4039     Args.push_back(&DummyPtr);
4040 
4041     CodeGenFunction CGF(CGM);
4042     // Disable debug info for global (de-)initializer because they are not part
4043     // of some particular construct.
4044     CGF.disableDebugInfo();
4045     const auto &FI =
4046         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4047     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4048     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
4049     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4050     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4051     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4052                         Desc);
4053     CGF.FinishFunction();
4054   }
4055   llvm::Function *RegFn;
4056   {
4057     CodeGenFunction CGF(CGM);
4058     // Disable debug info for global (de-)initializer because they are not part
4059     // of some particular construct.
4060     CGF.disableDebugInfo();
4061     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4062     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4063 
4064     // Encode offload target triples into the registration function name. It
4065     // will serve as a comdat key for the registration/unregistration code for
4066     // this particular combination of offloading targets.
4067     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4068     RegFnNameParts[0] = "omp_offloading";
4069     RegFnNameParts[1] = "descriptor_reg";
4070     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4071                     [](const llvm::Triple &T) -> const std::string& {
4072                       return T.getTriple();
4073                     });
4074     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4075     std::string Descriptor = getName(RegFnNameParts);
4076     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4077     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4078     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4079     // Create a variable to drive the registration and unregistration of the
4080     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4081     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4082                                   SourceLocation(), nullptr, C.CharTy,
4083                                   ImplicitParamDecl::Other);
4084     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4085     CGF.FinishFunction();
4086   }
4087   if (CGM.supportsCOMDAT()) {
4088     // It is sufficient to call registration function only once, so create a
4089     // COMDAT group for registration/unregistration functions and associated
4090     // data. That would reduce startup time and code size. Registration
4091     // function serves as a COMDAT group key.
4092     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4093     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4094     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4095     RegFn->setComdat(ComdatKey);
4096     UnRegFn->setComdat(ComdatKey);
4097     DeviceImages->setComdat(ComdatKey);
4098     Desc->setComdat(ComdatKey);
4099   }
4100   return RegFn;
4101 }
4102 
4103 void CGOpenMPRuntime::createOffloadEntry(
4104     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4105     llvm::GlobalValue::LinkageTypes Linkage) {
4106   StringRef Name = Addr->getName();
4107   llvm::Module &M = CGM.getModule();
4108   llvm::LLVMContext &C = M.getContext();
4109 
4110   // Create constant string with the name.
4111   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4112 
4113   std::string StringName = getName({"omp_offloading", "entry_name"});
4114   auto *Str = new llvm::GlobalVariable(
4115       M, StrPtrInit->getType(), /*isConstant=*/true,
4116       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4117   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4118 
4119   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4120                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4121                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4122                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4123                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4124   std::string EntryName = getName({"omp_offloading", "entry", ""});
4125   llvm::GlobalVariable *Entry = createGlobalStruct(
4126       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4127       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4128 
4129   // The entry has to be created in the section the linker expects it to be.
4130   std::string Section = getName({"omp_offloading", "entries"});
4131   Entry->setSection(Section);
4132 }
4133 
4134 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4135   // Emit the offloading entries and metadata so that the device codegen side
4136   // can easily figure out what to emit. The produced metadata looks like
4137   // this:
4138   //
4139   // !omp_offload.info = !{!1, ...}
4140   //
4141   // Right now we only generate metadata for function that contain target
4142   // regions.
4143 
4144   // If we do not have entries, we don't need to do anything.
4145   if (OffloadEntriesInfoManager.empty())
4146     return;
4147 
4148   llvm::Module &M = CGM.getModule();
4149   llvm::LLVMContext &C = M.getContext();
4150   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4151       OrderedEntries(OffloadEntriesInfoManager.size());
4152   llvm::SmallVector<StringRef, 16> ParentFunctions(
4153       OffloadEntriesInfoManager.size());
4154 
4155   // Auxiliary methods to create metadata values and strings.
4156   auto &&GetMDInt = [this](unsigned V) {
4157     return llvm::ConstantAsMetadata::get(
4158         llvm::ConstantInt::get(CGM.Int32Ty, V));
4159   };
4160 
4161   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4162 
4163   // Create the offloading info metadata node.
4164   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4165 
4166   // Create function that emits metadata for each target region entry;
4167   auto &&TargetRegionMetadataEmitter =
4168       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4169           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4170           unsigned Line,
4171           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4172         // Generate metadata for target regions. Each entry of this metadata
4173         // contains:
4174         // - Entry 0 -> Kind of this type of metadata (0).
4175         // - Entry 1 -> Device ID of the file where the entry was identified.
4176         // - Entry 2 -> File ID of the file where the entry was identified.
4177         // - Entry 3 -> Mangled name of the function where the entry was
4178         // identified.
4179         // - Entry 4 -> Line in the file where the entry was identified.
4180         // - Entry 5 -> Order the entry was created.
4181         // The first element of the metadata node is the kind.
4182         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4183                                  GetMDInt(FileID),      GetMDString(ParentName),
4184                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4185 
4186         // Save this entry in the right position of the ordered entries array.
4187         OrderedEntries[E.getOrder()] = &E;
4188         ParentFunctions[E.getOrder()] = ParentName;
4189 
4190         // Add metadata to the named metadata node.
4191         MD->addOperand(llvm::MDNode::get(C, Ops));
4192       };
4193 
4194   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4195       TargetRegionMetadataEmitter);
4196 
4197   // Create function that emits metadata for each device global variable entry;
4198   auto &&DeviceGlobalVarMetadataEmitter =
4199       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4200        MD](StringRef MangledName,
4201            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4202                &E) {
4203         // Generate metadata for global variables. Each entry of this metadata
4204         // contains:
4205         // - Entry 0 -> Kind of this type of metadata (1).
4206         // - Entry 1 -> Mangled name of the variable.
4207         // - Entry 2 -> Declare target kind.
4208         // - Entry 3 -> Order the entry was created.
4209         // The first element of the metadata node is the kind.
4210         llvm::Metadata *Ops[] = {
4211             GetMDInt(E.getKind()), GetMDString(MangledName),
4212             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4213 
4214         // Save this entry in the right position of the ordered entries array.
4215         OrderedEntries[E.getOrder()] = &E;
4216 
4217         // Add metadata to the named metadata node.
4218         MD->addOperand(llvm::MDNode::get(C, Ops));
4219       };
4220 
4221   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4222       DeviceGlobalVarMetadataEmitter);
4223 
4224   for (const auto *E : OrderedEntries) {
4225     assert(E && "All ordered entries must exist!");
4226     if (const auto *CE =
4227             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4228                 E)) {
4229       if (!CE->getID() || !CE->getAddress()) {
4230         // Do not blame the entry if the parent funtion is not emitted.
4231         StringRef FnName = ParentFunctions[CE->getOrder()];
4232         if (!CGM.GetGlobalValue(FnName))
4233           continue;
4234         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4235             DiagnosticsEngine::Error,
4236             "Offloading entry for target region is incorrect: either the "
4237             "address or the ID is invalid.");
4238         CGM.getDiags().Report(DiagID);
4239         continue;
4240       }
4241       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4242                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4243     } else if (const auto *CE =
4244                    dyn_cast<OffloadEntriesInfoManagerTy::
4245                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4246       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4247           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4248               CE->getFlags());
4249       switch (Flags) {
4250       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4251         if (CGM.getLangOpts().OpenMPIsDevice &&
4252             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4253           continue;
4254         if (!CE->getAddress()) {
4255           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4256               DiagnosticsEngine::Error,
4257               "Offloading entry for declare target variable is incorrect: the "
4258               "address is invalid.");
4259           CGM.getDiags().Report(DiagID);
4260           continue;
4261         }
4262         // The vaiable has no definition - no need to add the entry.
4263         if (CE->getVarSize().isZero())
4264           continue;
4265         break;
4266       }
4267       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4268         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4269                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4270                "Declaret target link address is set.");
4271         if (CGM.getLangOpts().OpenMPIsDevice)
4272           continue;
4273         if (!CE->getAddress()) {
4274           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4275               DiagnosticsEngine::Error,
4276               "Offloading entry for declare target variable is incorrect: the "
4277               "address is invalid.");
4278           CGM.getDiags().Report(DiagID);
4279           continue;
4280         }
4281         break;
4282       }
4283       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4284                          CE->getVarSize().getQuantity(), Flags,
4285                          CE->getLinkage());
4286     } else {
4287       llvm_unreachable("Unsupported entry kind.");
4288     }
4289   }
4290 }
4291 
4292 /// Loads all the offload entries information from the host IR
4293 /// metadata.
4294 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4295   // If we are in target mode, load the metadata from the host IR. This code has
4296   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4297 
4298   if (!CGM.getLangOpts().OpenMPIsDevice)
4299     return;
4300 
4301   if (CGM.getLangOpts().OMPHostIRFile.empty())
4302     return;
4303 
4304   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4305   if (auto EC = Buf.getError()) {
4306     CGM.getDiags().Report(diag::err_cannot_open_file)
4307         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4308     return;
4309   }
4310 
4311   llvm::LLVMContext C;
4312   auto ME = expectedToErrorOrAndEmitErrors(
4313       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4314 
4315   if (auto EC = ME.getError()) {
4316     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4317         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4318     CGM.getDiags().Report(DiagID)
4319         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4320     return;
4321   }
4322 
4323   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4324   if (!MD)
4325     return;
4326 
4327   for (llvm::MDNode *MN : MD->operands()) {
4328     auto &&GetMDInt = [MN](unsigned Idx) {
4329       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4330       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4331     };
4332 
4333     auto &&GetMDString = [MN](unsigned Idx) {
4334       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4335       return V->getString();
4336     };
4337 
4338     switch (GetMDInt(0)) {
4339     default:
4340       llvm_unreachable("Unexpected metadata!");
4341       break;
4342     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4343         OffloadingEntryInfoTargetRegion:
4344       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4345           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4346           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4347           /*Order=*/GetMDInt(5));
4348       break;
4349     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4350         OffloadingEntryInfoDeviceGlobalVar:
4351       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4352           /*MangledName=*/GetMDString(1),
4353           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4354               /*Flags=*/GetMDInt(2)),
4355           /*Order=*/GetMDInt(3));
4356       break;
4357     }
4358   }
4359 }
4360 
4361 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4362   if (!KmpRoutineEntryPtrTy) {
4363     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4364     ASTContext &C = CGM.getContext();
4365     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4366     FunctionProtoType::ExtProtoInfo EPI;
4367     KmpRoutineEntryPtrQTy = C.getPointerType(
4368         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4369     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4370   }
4371 }
4372 
4373 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4374   // Make sure the type of the entry is already created. This is the type we
4375   // have to create:
4376   // struct __tgt_offload_entry{
4377   //   void      *addr;       // Pointer to the offload entry info.
4378   //                          // (function or global)
4379   //   char      *name;       // Name of the function or global.
4380   //   size_t     size;       // Size of the entry info (0 if it a function).
4381   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4382   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4383   // };
4384   if (TgtOffloadEntryQTy.isNull()) {
4385     ASTContext &C = CGM.getContext();
4386     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4387     RD->startDefinition();
4388     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4389     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4390     addFieldToRecordDecl(C, RD, C.getSizeType());
4391     addFieldToRecordDecl(
4392         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4393     addFieldToRecordDecl(
4394         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4395     RD->completeDefinition();
4396     RD->addAttr(PackedAttr::CreateImplicit(C));
4397     TgtOffloadEntryQTy = C.getRecordType(RD);
4398   }
4399   return TgtOffloadEntryQTy;
4400 }
4401 
4402 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4403   // These are the types we need to build:
4404   // struct __tgt_device_image{
4405   // void   *ImageStart;       // Pointer to the target code start.
4406   // void   *ImageEnd;         // Pointer to the target code end.
4407   // // We also add the host entries to the device image, as it may be useful
4408   // // for the target runtime to have access to that information.
4409   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4410   //                                       // the entries.
4411   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4412   //                                       // entries (non inclusive).
4413   // };
4414   if (TgtDeviceImageQTy.isNull()) {
4415     ASTContext &C = CGM.getContext();
4416     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4417     RD->startDefinition();
4418     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4419     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4420     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4421     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4422     RD->completeDefinition();
4423     TgtDeviceImageQTy = C.getRecordType(RD);
4424   }
4425   return TgtDeviceImageQTy;
4426 }
4427 
4428 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4429   // struct __tgt_bin_desc{
4430   //   int32_t              NumDevices;      // Number of devices supported.
4431   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4432   //                                         // (one per device).
4433   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4434   //                                         // entries.
4435   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4436   //                                         // entries (non inclusive).
4437   // };
4438   if (TgtBinaryDescriptorQTy.isNull()) {
4439     ASTContext &C = CGM.getContext();
4440     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4441     RD->startDefinition();
4442     addFieldToRecordDecl(
4443         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4444     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4445     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4446     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4447     RD->completeDefinition();
4448     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4449   }
4450   return TgtBinaryDescriptorQTy;
4451 }
4452 
4453 namespace {
4454 struct PrivateHelpersTy {
4455   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4456                    const VarDecl *PrivateElemInit)
4457       : Original(Original), PrivateCopy(PrivateCopy),
4458         PrivateElemInit(PrivateElemInit) {}
4459   const VarDecl *Original;
4460   const VarDecl *PrivateCopy;
4461   const VarDecl *PrivateElemInit;
4462 };
4463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4464 } // anonymous namespace
4465 
4466 static RecordDecl *
4467 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4468   if (!Privates.empty()) {
4469     ASTContext &C = CGM.getContext();
4470     // Build struct .kmp_privates_t. {
4471     //         /*  private vars  */
4472     //       };
4473     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4474     RD->startDefinition();
4475     for (const auto &Pair : Privates) {
4476       const VarDecl *VD = Pair.second.Original;
4477       QualType Type = VD->getType().getNonReferenceType();
4478       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4479       if (VD->hasAttrs()) {
4480         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4481              E(VD->getAttrs().end());
4482              I != E; ++I)
4483           FD->addAttr(*I);
4484       }
4485     }
4486     RD->completeDefinition();
4487     return RD;
4488   }
4489   return nullptr;
4490 }
4491 
4492 static RecordDecl *
4493 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4494                          QualType KmpInt32Ty,
4495                          QualType KmpRoutineEntryPointerQTy) {
4496   ASTContext &C = CGM.getContext();
4497   // Build struct kmp_task_t {
4498   //         void *              shareds;
4499   //         kmp_routine_entry_t routine;
4500   //         kmp_int32           part_id;
4501   //         kmp_cmplrdata_t data1;
4502   //         kmp_cmplrdata_t data2;
4503   // For taskloops additional fields:
4504   //         kmp_uint64          lb;
4505   //         kmp_uint64          ub;
4506   //         kmp_int64           st;
4507   //         kmp_int32           liter;
4508   //         void *              reductions;
4509   //       };
4510   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4511   UD->startDefinition();
4512   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4513   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4514   UD->completeDefinition();
4515   QualType KmpCmplrdataTy = C.getRecordType(UD);
4516   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4517   RD->startDefinition();
4518   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4519   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4520   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4521   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4522   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4523   if (isOpenMPTaskLoopDirective(Kind)) {
4524     QualType KmpUInt64Ty =
4525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4526     QualType KmpInt64Ty =
4527         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4528     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4529     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4530     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4531     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4532     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4533   }
4534   RD->completeDefinition();
4535   return RD;
4536 }
4537 
4538 static RecordDecl *
4539 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4540                                      ArrayRef<PrivateDataTy> Privates) {
4541   ASTContext &C = CGM.getContext();
4542   // Build struct kmp_task_t_with_privates {
4543   //         kmp_task_t task_data;
4544   //         .kmp_privates_t. privates;
4545   //       };
4546   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4547   RD->startDefinition();
4548   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4549   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4550     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4551   RD->completeDefinition();
4552   return RD;
4553 }
4554 
4555 /// Emit a proxy function which accepts kmp_task_t as the second
4556 /// argument.
4557 /// \code
4558 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4559 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4560 ///   For taskloops:
4561 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4562 ///   tt->reductions, tt->shareds);
4563 ///   return 0;
4564 /// }
4565 /// \endcode
4566 static llvm::Function *
4567 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4568                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4569                       QualType KmpTaskTWithPrivatesPtrQTy,
4570                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4571                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4572                       llvm::Value *TaskPrivatesMap) {
4573   ASTContext &C = CGM.getContext();
4574   FunctionArgList Args;
4575   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4576                             ImplicitParamDecl::Other);
4577   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4578                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4579                                 ImplicitParamDecl::Other);
4580   Args.push_back(&GtidArg);
4581   Args.push_back(&TaskTypeArg);
4582   const auto &TaskEntryFnInfo =
4583       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4584   llvm::FunctionType *TaskEntryTy =
4585       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4586   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4587   auto *TaskEntry = llvm::Function::Create(
4588       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4589   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4590   TaskEntry->setDoesNotRecurse();
4591   CodeGenFunction CGF(CGM);
4592   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4593                     Loc, Loc);
4594 
4595   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4596   // tt,
4597   // For taskloops:
4598   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4599   // tt->task_data.shareds);
4600   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4601       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4602   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4603       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4604       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4605   const auto *KmpTaskTWithPrivatesQTyRD =
4606       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4607   LValue Base =
4608       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4609   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4610   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4611   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4612   llvm::Value *PartidParam = PartIdLVal.getPointer();
4613 
4614   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4615   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4616   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4617       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4618       CGF.ConvertTypeForMem(SharedsPtrTy));
4619 
4620   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4621   llvm::Value *PrivatesParam;
4622   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4623     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4624     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4625         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4626   } else {
4627     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4628   }
4629 
4630   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4631                                TaskPrivatesMap,
4632                                CGF.Builder
4633                                    .CreatePointerBitCastOrAddrSpaceCast(
4634                                        TDBase.getAddress(), CGF.VoidPtrTy)
4635                                    .getPointer()};
4636   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4637                                           std::end(CommonArgs));
4638   if (isOpenMPTaskLoopDirective(Kind)) {
4639     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4640     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4641     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4642     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4643     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4644     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4645     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4646     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4647     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4648     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4649     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4650     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4651     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4652     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4653     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4654     CallArgs.push_back(LBParam);
4655     CallArgs.push_back(UBParam);
4656     CallArgs.push_back(StParam);
4657     CallArgs.push_back(LIParam);
4658     CallArgs.push_back(RParam);
4659   }
4660   CallArgs.push_back(SharedsParam);
4661 
4662   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4663                                                   CallArgs);
4664   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4665                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4666   CGF.FinishFunction();
4667   return TaskEntry;
4668 }
4669 
4670 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4671                                             SourceLocation Loc,
4672                                             QualType KmpInt32Ty,
4673                                             QualType KmpTaskTWithPrivatesPtrQTy,
4674                                             QualType KmpTaskTWithPrivatesQTy) {
4675   ASTContext &C = CGM.getContext();
4676   FunctionArgList Args;
4677   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4678                             ImplicitParamDecl::Other);
4679   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4680                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4681                                 ImplicitParamDecl::Other);
4682   Args.push_back(&GtidArg);
4683   Args.push_back(&TaskTypeArg);
4684   const auto &DestructorFnInfo =
4685       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4686   llvm::FunctionType *DestructorFnTy =
4687       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4688   std::string Name =
4689       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4690   auto *DestructorFn =
4691       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4692                              Name, &CGM.getModule());
4693   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4694                                     DestructorFnInfo);
4695   DestructorFn->setDoesNotRecurse();
4696   CodeGenFunction CGF(CGM);
4697   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4698                     Args, Loc, Loc);
4699 
4700   LValue Base = CGF.EmitLoadOfPointerLValue(
4701       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4702       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4703   const auto *KmpTaskTWithPrivatesQTyRD =
4704       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4705   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4706   Base = CGF.EmitLValueForField(Base, *FI);
4707   for (const auto *Field :
4708        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4709     if (QualType::DestructionKind DtorKind =
4710             Field->getType().isDestructedType()) {
4711       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4712       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4713     }
4714   }
4715   CGF.FinishFunction();
4716   return DestructorFn;
4717 }
4718 
4719 /// Emit a privates mapping function for correct handling of private and
4720 /// firstprivate variables.
4721 /// \code
4722 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4723 /// **noalias priv1,...,  <tyn> **noalias privn) {
4724 ///   *priv1 = &.privates.priv1;
4725 ///   ...;
4726 ///   *privn = &.privates.privn;
4727 /// }
4728 /// \endcode
4729 static llvm::Value *
4730 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4731                                ArrayRef<const Expr *> PrivateVars,
4732                                ArrayRef<const Expr *> FirstprivateVars,
4733                                ArrayRef<const Expr *> LastprivateVars,
4734                                QualType PrivatesQTy,
4735                                ArrayRef<PrivateDataTy> Privates) {
4736   ASTContext &C = CGM.getContext();
4737   FunctionArgList Args;
4738   ImplicitParamDecl TaskPrivatesArg(
4739       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4740       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4741       ImplicitParamDecl::Other);
4742   Args.push_back(&TaskPrivatesArg);
4743   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4744   unsigned Counter = 1;
4745   for (const Expr *E : PrivateVars) {
4746     Args.push_back(ImplicitParamDecl::Create(
4747         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4748         C.getPointerType(C.getPointerType(E->getType()))
4749             .withConst()
4750             .withRestrict(),
4751         ImplicitParamDecl::Other));
4752     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4753     PrivateVarsPos[VD] = Counter;
4754     ++Counter;
4755   }
4756   for (const Expr *E : FirstprivateVars) {
4757     Args.push_back(ImplicitParamDecl::Create(
4758         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4759         C.getPointerType(C.getPointerType(E->getType()))
4760             .withConst()
4761             .withRestrict(),
4762         ImplicitParamDecl::Other));
4763     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4764     PrivateVarsPos[VD] = Counter;
4765     ++Counter;
4766   }
4767   for (const Expr *E : LastprivateVars) {
4768     Args.push_back(ImplicitParamDecl::Create(
4769         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4770         C.getPointerType(C.getPointerType(E->getType()))
4771             .withConst()
4772             .withRestrict(),
4773         ImplicitParamDecl::Other));
4774     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4775     PrivateVarsPos[VD] = Counter;
4776     ++Counter;
4777   }
4778   const auto &TaskPrivatesMapFnInfo =
4779       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4780   llvm::FunctionType *TaskPrivatesMapTy =
4781       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4782   std::string Name =
4783       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4784   auto *TaskPrivatesMap = llvm::Function::Create(
4785       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4786       &CGM.getModule());
4787   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4788                                     TaskPrivatesMapFnInfo);
4789   if (CGM.getLangOpts().Optimize) {
4790     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4791     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4792     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4793   }
4794   CodeGenFunction CGF(CGM);
4795   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4796                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4797 
4798   // *privi = &.privates.privi;
4799   LValue Base = CGF.EmitLoadOfPointerLValue(
4800       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4801       TaskPrivatesArg.getType()->castAs<PointerType>());
4802   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4803   Counter = 0;
4804   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4805     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4806     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4807     LValue RefLVal =
4808         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4809     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4810         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4811     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4812     ++Counter;
4813   }
4814   CGF.FinishFunction();
4815   return TaskPrivatesMap;
4816 }
4817 
4818 /// Emit initialization for private variables in task-based directives.
4819 static void emitPrivatesInit(CodeGenFunction &CGF,
4820                              const OMPExecutableDirective &D,
4821                              Address KmpTaskSharedsPtr, LValue TDBase,
4822                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4823                              QualType SharedsTy, QualType SharedsPtrTy,
4824                              const OMPTaskDataTy &Data,
4825                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4826   ASTContext &C = CGF.getContext();
4827   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4828   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4829   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4830                                  ? OMPD_taskloop
4831                                  : OMPD_task;
4832   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4833   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4834   LValue SrcBase;
4835   bool IsTargetTask =
4836       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4837       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4838   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4839   // PointersArray and SizesArray. The original variables for these arrays are
4840   // not captured and we get their addresses explicitly.
4841   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4842       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4843     SrcBase = CGF.MakeAddrLValue(
4844         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4845             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4846         SharedsTy);
4847   }
4848   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4849   for (const PrivateDataTy &Pair : Privates) {
4850     const VarDecl *VD = Pair.second.PrivateCopy;
4851     const Expr *Init = VD->getAnyInitializer();
4852     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4853                              !CGF.isTrivialInitializer(Init)))) {
4854       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4855       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4856         const VarDecl *OriginalVD = Pair.second.Original;
4857         // Check if the variable is the target-based BasePointersArray,
4858         // PointersArray or SizesArray.
4859         LValue SharedRefLValue;
4860         QualType Type = PrivateLValue.getType();
4861         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4862         if (IsTargetTask && !SharedField) {
4863           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4864                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4865                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4866                          ->getNumParams() == 0 &&
4867                  isa<TranslationUnitDecl>(
4868                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4869                          ->getDeclContext()) &&
4870                  "Expected artificial target data variable.");
4871           SharedRefLValue =
4872               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4873         } else {
4874           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4875           SharedRefLValue = CGF.MakeAddrLValue(
4876               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4877               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4878               SharedRefLValue.getTBAAInfo());
4879         }
4880         if (Type->isArrayType()) {
4881           // Initialize firstprivate array.
4882           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4883             // Perform simple memcpy.
4884             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4885           } else {
4886             // Initialize firstprivate array using element-by-element
4887             // initialization.
4888             CGF.EmitOMPAggregateAssign(
4889                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4890                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4891                                                   Address SrcElement) {
4892                   // Clean up any temporaries needed by the initialization.
4893                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4894                   InitScope.addPrivate(
4895                       Elem, [SrcElement]() -> Address { return SrcElement; });
4896                   (void)InitScope.Privatize();
4897                   // Emit initialization for single element.
4898                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4899                       CGF, &CapturesInfo);
4900                   CGF.EmitAnyExprToMem(Init, DestElement,
4901                                        Init->getType().getQualifiers(),
4902                                        /*IsInitializer=*/false);
4903                 });
4904           }
4905         } else {
4906           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4907           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4908             return SharedRefLValue.getAddress();
4909           });
4910           (void)InitScope.Privatize();
4911           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4912           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4913                              /*capturedByInit=*/false);
4914         }
4915       } else {
4916         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4917       }
4918     }
4919     ++FI;
4920   }
4921 }
4922 
4923 /// Check if duplication function is required for taskloops.
4924 static bool checkInitIsRequired(CodeGenFunction &CGF,
4925                                 ArrayRef<PrivateDataTy> Privates) {
4926   bool InitRequired = false;
4927   for (const PrivateDataTy &Pair : Privates) {
4928     const VarDecl *VD = Pair.second.PrivateCopy;
4929     const Expr *Init = VD->getAnyInitializer();
4930     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4931                                     !CGF.isTrivialInitializer(Init));
4932     if (InitRequired)
4933       break;
4934   }
4935   return InitRequired;
4936 }
4937 
4938 
4939 /// Emit task_dup function (for initialization of
4940 /// private/firstprivate/lastprivate vars and last_iter flag)
4941 /// \code
4942 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4943 /// lastpriv) {
4944 /// // setup lastprivate flag
4945 ///    task_dst->last = lastpriv;
4946 /// // could be constructor calls here...
4947 /// }
4948 /// \endcode
4949 static llvm::Value *
4950 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4951                     const OMPExecutableDirective &D,
4952                     QualType KmpTaskTWithPrivatesPtrQTy,
4953                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4954                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4955                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4956                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4957   ASTContext &C = CGM.getContext();
4958   FunctionArgList Args;
4959   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4960                            KmpTaskTWithPrivatesPtrQTy,
4961                            ImplicitParamDecl::Other);
4962   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4963                            KmpTaskTWithPrivatesPtrQTy,
4964                            ImplicitParamDecl::Other);
4965   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4966                                 ImplicitParamDecl::Other);
4967   Args.push_back(&DstArg);
4968   Args.push_back(&SrcArg);
4969   Args.push_back(&LastprivArg);
4970   const auto &TaskDupFnInfo =
4971       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4972   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4973   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4974   auto *TaskDup = llvm::Function::Create(
4975       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4976   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4977   TaskDup->setDoesNotRecurse();
4978   CodeGenFunction CGF(CGM);
4979   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4980                     Loc);
4981 
4982   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4983       CGF.GetAddrOfLocalVar(&DstArg),
4984       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4985   // task_dst->liter = lastpriv;
4986   if (WithLastIter) {
4987     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4988     LValue Base = CGF.EmitLValueForField(
4989         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4990     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4991     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4992         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4993     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4994   }
4995 
4996   // Emit initial values for private copies (if any).
4997   assert(!Privates.empty());
4998   Address KmpTaskSharedsPtr = Address::invalid();
4999   if (!Data.FirstprivateVars.empty()) {
5000     LValue TDBase = CGF.EmitLoadOfPointerLValue(
5001         CGF.GetAddrOfLocalVar(&SrcArg),
5002         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
5003     LValue Base = CGF.EmitLValueForField(
5004         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
5005     KmpTaskSharedsPtr = Address(
5006         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
5007                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
5008                                                   KmpTaskTShareds)),
5009                              Loc),
5010         CGF.getNaturalTypeAlignment(SharedsTy));
5011   }
5012   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
5013                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
5014   CGF.FinishFunction();
5015   return TaskDup;
5016 }
5017 
5018 /// Checks if destructor function is required to be generated.
5019 /// \return true if cleanups are required, false otherwise.
5020 static bool
5021 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
5022   bool NeedsCleanup = false;
5023   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
5024   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
5025   for (const FieldDecl *FD : PrivateRD->fields()) {
5026     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
5027     if (NeedsCleanup)
5028       break;
5029   }
5030   return NeedsCleanup;
5031 }
5032 
5033 CGOpenMPRuntime::TaskResultTy
5034 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5035                               const OMPExecutableDirective &D,
5036                               llvm::Function *TaskFunction, QualType SharedsTy,
5037                               Address Shareds, const OMPTaskDataTy &Data) {
5038   ASTContext &C = CGM.getContext();
5039   llvm::SmallVector<PrivateDataTy, 4> Privates;
5040   // Aggregate privates and sort them by the alignment.
5041   auto I = Data.PrivateCopies.begin();
5042   for (const Expr *E : Data.PrivateVars) {
5043     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5044     Privates.emplace_back(
5045         C.getDeclAlign(VD),
5046         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5047                          /*PrivateElemInit=*/nullptr));
5048     ++I;
5049   }
5050   I = Data.FirstprivateCopies.begin();
5051   auto IElemInitRef = Data.FirstprivateInits.begin();
5052   for (const Expr *E : Data.FirstprivateVars) {
5053     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5054     Privates.emplace_back(
5055         C.getDeclAlign(VD),
5056         PrivateHelpersTy(
5057             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5058             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5059     ++I;
5060     ++IElemInitRef;
5061   }
5062   I = Data.LastprivateCopies.begin();
5063   for (const Expr *E : Data.LastprivateVars) {
5064     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5065     Privates.emplace_back(
5066         C.getDeclAlign(VD),
5067         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5068                          /*PrivateElemInit=*/nullptr));
5069     ++I;
5070   }
5071   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5072     return L.first > R.first;
5073   });
5074   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5075   // Build type kmp_routine_entry_t (if not built yet).
5076   emitKmpRoutineEntryT(KmpInt32Ty);
5077   // Build type kmp_task_t (if not built yet).
5078   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5079     if (SavedKmpTaskloopTQTy.isNull()) {
5080       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5081           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5082     }
5083     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5084   } else {
5085     assert((D.getDirectiveKind() == OMPD_task ||
5086             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5087             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5088            "Expected taskloop, task or target directive");
5089     if (SavedKmpTaskTQTy.isNull()) {
5090       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5091           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5092     }
5093     KmpTaskTQTy = SavedKmpTaskTQTy;
5094   }
5095   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5096   // Build particular struct kmp_task_t for the given task.
5097   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5098       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5099   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5100   QualType KmpTaskTWithPrivatesPtrQTy =
5101       C.getPointerType(KmpTaskTWithPrivatesQTy);
5102   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5103   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5104       KmpTaskTWithPrivatesTy->getPointerTo();
5105   llvm::Value *KmpTaskTWithPrivatesTySize =
5106       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5107   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5108 
5109   // Emit initial values for private copies (if any).
5110   llvm::Value *TaskPrivatesMap = nullptr;
5111   llvm::Type *TaskPrivatesMapTy =
5112       std::next(TaskFunction->arg_begin(), 3)->getType();
5113   if (!Privates.empty()) {
5114     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5115     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5116         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5117         FI->getType(), Privates);
5118     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5119         TaskPrivatesMap, TaskPrivatesMapTy);
5120   } else {
5121     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5122         cast<llvm::PointerType>(TaskPrivatesMapTy));
5123   }
5124   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5125   // kmp_task_t *tt);
5126   llvm::Function *TaskEntry = emitProxyTaskFunction(
5127       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5128       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5129       TaskPrivatesMap);
5130 
5131   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5132   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5133   // kmp_routine_entry_t *task_entry);
5134   // Task flags. Format is taken from
5135   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5136   // description of kmp_tasking_flags struct.
5137   enum {
5138     TiedFlag = 0x1,
5139     FinalFlag = 0x2,
5140     DestructorsFlag = 0x8,
5141     PriorityFlag = 0x20
5142   };
5143   unsigned Flags = Data.Tied ? TiedFlag : 0;
5144   bool NeedsCleanup = false;
5145   if (!Privates.empty()) {
5146     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5147     if (NeedsCleanup)
5148       Flags = Flags | DestructorsFlag;
5149   }
5150   if (Data.Priority.getInt())
5151     Flags = Flags | PriorityFlag;
5152   llvm::Value *TaskFlags =
5153       Data.Final.getPointer()
5154           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5155                                      CGF.Builder.getInt32(FinalFlag),
5156                                      CGF.Builder.getInt32(/*C=*/0))
5157           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5158   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5159   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5160   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5161       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5162       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5163           TaskEntry, KmpRoutineEntryPtrTy)};
5164   llvm::Value *NewTask;
5165   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5166     // Check if we have any device clause associated with the directive.
5167     const Expr *Device = nullptr;
5168     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5169       Device = C->getDevice();
5170     // Emit device ID if any otherwise use default value.
5171     llvm::Value *DeviceID;
5172     if (Device)
5173       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5174                                            CGF.Int64Ty, /*isSigned=*/true);
5175     else
5176       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5177     AllocArgs.push_back(DeviceID);
5178     NewTask = CGF.EmitRuntimeCall(
5179       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5180   } else {
5181     NewTask = CGF.EmitRuntimeCall(
5182       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5183   }
5184   llvm::Value *NewTaskNewTaskTTy =
5185       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5186           NewTask, KmpTaskTWithPrivatesPtrTy);
5187   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5188                                                KmpTaskTWithPrivatesQTy);
5189   LValue TDBase =
5190       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5191   // Fill the data in the resulting kmp_task_t record.
5192   // Copy shareds if there are any.
5193   Address KmpTaskSharedsPtr = Address::invalid();
5194   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5195     KmpTaskSharedsPtr =
5196         Address(CGF.EmitLoadOfScalar(
5197                     CGF.EmitLValueForField(
5198                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5199                                            KmpTaskTShareds)),
5200                     Loc),
5201                 CGF.getNaturalTypeAlignment(SharedsTy));
5202     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5203     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5204     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5205   }
5206   // Emit initial values for private copies (if any).
5207   TaskResultTy Result;
5208   if (!Privates.empty()) {
5209     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5210                      SharedsTy, SharedsPtrTy, Data, Privates,
5211                      /*ForDup=*/false);
5212     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5213         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5214       Result.TaskDupFn = emitTaskDupFunction(
5215           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5216           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5217           /*WithLastIter=*/!Data.LastprivateVars.empty());
5218     }
5219   }
5220   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5221   enum { Priority = 0, Destructors = 1 };
5222   // Provide pointer to function with destructors for privates.
5223   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5224   const RecordDecl *KmpCmplrdataUD =
5225       (*FI)->getType()->getAsUnionType()->getDecl();
5226   if (NeedsCleanup) {
5227     llvm::Value *DestructorFn = emitDestructorsFunction(
5228         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5229         KmpTaskTWithPrivatesQTy);
5230     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5231     LValue DestructorsLV = CGF.EmitLValueForField(
5232         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5233     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5234                               DestructorFn, KmpRoutineEntryPtrTy),
5235                           DestructorsLV);
5236   }
5237   // Set priority.
5238   if (Data.Priority.getInt()) {
5239     LValue Data2LV = CGF.EmitLValueForField(
5240         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5241     LValue PriorityLV = CGF.EmitLValueForField(
5242         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5243     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5244   }
5245   Result.NewTask = NewTask;
5246   Result.TaskEntry = TaskEntry;
5247   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5248   Result.TDBase = TDBase;
5249   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5250   return Result;
5251 }
5252 
5253 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5254                                    const OMPExecutableDirective &D,
5255                                    llvm::Function *TaskFunction,
5256                                    QualType SharedsTy, Address Shareds,
5257                                    const Expr *IfCond,
5258                                    const OMPTaskDataTy &Data) {
5259   if (!CGF.HaveInsertPoint())
5260     return;
5261 
5262   TaskResultTy Result =
5263       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5264   llvm::Value *NewTask = Result.NewTask;
5265   llvm::Function *TaskEntry = Result.TaskEntry;
5266   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5267   LValue TDBase = Result.TDBase;
5268   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5269   ASTContext &C = CGM.getContext();
5270   // Process list of dependences.
5271   Address DependenciesArray = Address::invalid();
5272   unsigned NumDependencies = Data.Dependences.size();
5273   if (NumDependencies) {
5274     // Dependence kind for RTL.
5275     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5276     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5277     RecordDecl *KmpDependInfoRD;
5278     QualType FlagsTy =
5279         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5280     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5281     if (KmpDependInfoTy.isNull()) {
5282       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5283       KmpDependInfoRD->startDefinition();
5284       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5285       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5286       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5287       KmpDependInfoRD->completeDefinition();
5288       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5289     } else {
5290       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5291     }
5292     // Define type kmp_depend_info[<Dependences.size()>];
5293     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5294         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5295         ArrayType::Normal, /*IndexTypeQuals=*/0);
5296     // kmp_depend_info[<Dependences.size()>] deps;
5297     DependenciesArray =
5298         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5299     for (unsigned I = 0; I < NumDependencies; ++I) {
5300       const Expr *E = Data.Dependences[I].second;
5301       LValue Addr = CGF.EmitLValue(E);
5302       llvm::Value *Size;
5303       QualType Ty = E->getType();
5304       if (const auto *ASE =
5305               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5306         LValue UpAddrLVal =
5307             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5308         llvm::Value *UpAddr =
5309             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5310         llvm::Value *LowIntPtr =
5311             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5312         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5313         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5314       } else {
5315         Size = CGF.getTypeSize(Ty);
5316       }
5317       LValue Base = CGF.MakeAddrLValue(
5318           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5319           KmpDependInfoTy);
5320       // deps[i].base_addr = &<Dependences[i].second>;
5321       LValue BaseAddrLVal = CGF.EmitLValueForField(
5322           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5323       CGF.EmitStoreOfScalar(
5324           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5325           BaseAddrLVal);
5326       // deps[i].len = sizeof(<Dependences[i].second>);
5327       LValue LenLVal = CGF.EmitLValueForField(
5328           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5329       CGF.EmitStoreOfScalar(Size, LenLVal);
5330       // deps[i].flags = <Dependences[i].first>;
5331       RTLDependenceKindTy DepKind;
5332       switch (Data.Dependences[I].first) {
5333       case OMPC_DEPEND_in:
5334         DepKind = DepIn;
5335         break;
5336       // Out and InOut dependencies must use the same code.
5337       case OMPC_DEPEND_out:
5338       case OMPC_DEPEND_inout:
5339         DepKind = DepInOut;
5340         break;
5341       case OMPC_DEPEND_mutexinoutset:
5342         DepKind = DepMutexInOutSet;
5343         break;
5344       case OMPC_DEPEND_source:
5345       case OMPC_DEPEND_sink:
5346       case OMPC_DEPEND_unknown:
5347         llvm_unreachable("Unknown task dependence type");
5348       }
5349       LValue FlagsLVal = CGF.EmitLValueForField(
5350           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5351       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5352                             FlagsLVal);
5353     }
5354     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5355         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5356   }
5357 
5358   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5359   // libcall.
5360   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5361   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5362   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5363   // list is not empty
5364   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5365   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5366   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5367   llvm::Value *DepTaskArgs[7];
5368   if (NumDependencies) {
5369     DepTaskArgs[0] = UpLoc;
5370     DepTaskArgs[1] = ThreadID;
5371     DepTaskArgs[2] = NewTask;
5372     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5373     DepTaskArgs[4] = DependenciesArray.getPointer();
5374     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5375     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5376   }
5377   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5378                         &TaskArgs,
5379                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5380     if (!Data.Tied) {
5381       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5382       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5383       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5384     }
5385     if (NumDependencies) {
5386       CGF.EmitRuntimeCall(
5387           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5388     } else {
5389       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5390                           TaskArgs);
5391     }
5392     // Check if parent region is untied and build return for untied task;
5393     if (auto *Region =
5394             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5395       Region->emitUntiedSwitch(CGF);
5396   };
5397 
5398   llvm::Value *DepWaitTaskArgs[6];
5399   if (NumDependencies) {
5400     DepWaitTaskArgs[0] = UpLoc;
5401     DepWaitTaskArgs[1] = ThreadID;
5402     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5403     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5404     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5405     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5406   }
5407   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5408                         NumDependencies, &DepWaitTaskArgs,
5409                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5410     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5411     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5412     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5413     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5414     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5415     // is specified.
5416     if (NumDependencies)
5417       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5418                           DepWaitTaskArgs);
5419     // Call proxy_task_entry(gtid, new_task);
5420     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5421                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5422       Action.Enter(CGF);
5423       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5424       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5425                                                           OutlinedFnArgs);
5426     };
5427 
5428     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5429     // kmp_task_t *new_task);
5430     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5431     // kmp_task_t *new_task);
5432     RegionCodeGenTy RCG(CodeGen);
5433     CommonActionTy Action(
5434         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5435         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5436     RCG.setAction(Action);
5437     RCG(CGF);
5438   };
5439 
5440   if (IfCond) {
5441     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5442   } else {
5443     RegionCodeGenTy ThenRCG(ThenCodeGen);
5444     ThenRCG(CGF);
5445   }
5446 }
5447 
5448 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5449                                        const OMPLoopDirective &D,
5450                                        llvm::Function *TaskFunction,
5451                                        QualType SharedsTy, Address Shareds,
5452                                        const Expr *IfCond,
5453                                        const OMPTaskDataTy &Data) {
5454   if (!CGF.HaveInsertPoint())
5455     return;
5456   TaskResultTy Result =
5457       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5458   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5459   // libcall.
5460   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5461   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5462   // sched, kmp_uint64 grainsize, void *task_dup);
5463   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5464   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5465   llvm::Value *IfVal;
5466   if (IfCond) {
5467     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5468                                       /*isSigned=*/true);
5469   } else {
5470     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5471   }
5472 
5473   LValue LBLVal = CGF.EmitLValueForField(
5474       Result.TDBase,
5475       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5476   const auto *LBVar =
5477       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5478   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5479                        /*IsInitializer=*/true);
5480   LValue UBLVal = CGF.EmitLValueForField(
5481       Result.TDBase,
5482       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5483   const auto *UBVar =
5484       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5485   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5486                        /*IsInitializer=*/true);
5487   LValue StLVal = CGF.EmitLValueForField(
5488       Result.TDBase,
5489       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5490   const auto *StVar =
5491       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5492   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5493                        /*IsInitializer=*/true);
5494   // Store reductions address.
5495   LValue RedLVal = CGF.EmitLValueForField(
5496       Result.TDBase,
5497       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5498   if (Data.Reductions) {
5499     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5500   } else {
5501     CGF.EmitNullInitialization(RedLVal.getAddress(),
5502                                CGF.getContext().VoidPtrTy);
5503   }
5504   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5505   llvm::Value *TaskArgs[] = {
5506       UpLoc,
5507       ThreadID,
5508       Result.NewTask,
5509       IfVal,
5510       LBLVal.getPointer(),
5511       UBLVal.getPointer(),
5512       CGF.EmitLoadOfScalar(StLVal, Loc),
5513       llvm::ConstantInt::getSigned(
5514               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5515       llvm::ConstantInt::getSigned(
5516           CGF.IntTy, Data.Schedule.getPointer()
5517                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5518                          : NoSchedule),
5519       Data.Schedule.getPointer()
5520           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5521                                       /*isSigned=*/false)
5522           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5523       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5524                              Result.TaskDupFn, CGF.VoidPtrTy)
5525                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5526   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5527 }
5528 
5529 /// Emit reduction operation for each element of array (required for
5530 /// array sections) LHS op = RHS.
5531 /// \param Type Type of array.
5532 /// \param LHSVar Variable on the left side of the reduction operation
5533 /// (references element of array in original variable).
5534 /// \param RHSVar Variable on the right side of the reduction operation
5535 /// (references element of array in original variable).
5536 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5537 /// RHSVar.
5538 static void EmitOMPAggregateReduction(
5539     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5540     const VarDecl *RHSVar,
5541     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5542                                   const Expr *, const Expr *)> &RedOpGen,
5543     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5544     const Expr *UpExpr = nullptr) {
5545   // Perform element-by-element initialization.
5546   QualType ElementTy;
5547   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5548   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5549 
5550   // Drill down to the base element type on both arrays.
5551   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5552   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5553 
5554   llvm::Value *RHSBegin = RHSAddr.getPointer();
5555   llvm::Value *LHSBegin = LHSAddr.getPointer();
5556   // Cast from pointer to array type to pointer to single element.
5557   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5558   // The basic structure here is a while-do loop.
5559   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5560   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5561   llvm::Value *IsEmpty =
5562       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5563   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5564 
5565   // Enter the loop body, making that address the current address.
5566   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5567   CGF.EmitBlock(BodyBB);
5568 
5569   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5570 
5571   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5572       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5573   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5574   Address RHSElementCurrent =
5575       Address(RHSElementPHI,
5576               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5577 
5578   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5579       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5580   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5581   Address LHSElementCurrent =
5582       Address(LHSElementPHI,
5583               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5584 
5585   // Emit copy.
5586   CodeGenFunction::OMPPrivateScope Scope(CGF);
5587   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5588   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5589   Scope.Privatize();
5590   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5591   Scope.ForceCleanup();
5592 
5593   // Shift the address forward by one element.
5594   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5595       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5596   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5597       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5598   // Check whether we've reached the end.
5599   llvm::Value *Done =
5600       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5601   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5602   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5603   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5604 
5605   // Done.
5606   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5607 }
5608 
5609 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5610 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5611 /// UDR combiner function.
5612 static void emitReductionCombiner(CodeGenFunction &CGF,
5613                                   const Expr *ReductionOp) {
5614   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5615     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5616       if (const auto *DRE =
5617               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5618         if (const auto *DRD =
5619                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5620           std::pair<llvm::Function *, llvm::Function *> Reduction =
5621               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5622           RValue Func = RValue::get(Reduction.first);
5623           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5624           CGF.EmitIgnoredExpr(ReductionOp);
5625           return;
5626         }
5627   CGF.EmitIgnoredExpr(ReductionOp);
5628 }
5629 
5630 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5631     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5632     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5633     ArrayRef<const Expr *> ReductionOps) {
5634   ASTContext &C = CGM.getContext();
5635 
5636   // void reduction_func(void *LHSArg, void *RHSArg);
5637   FunctionArgList Args;
5638   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5639                            ImplicitParamDecl::Other);
5640   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5641                            ImplicitParamDecl::Other);
5642   Args.push_back(&LHSArg);
5643   Args.push_back(&RHSArg);
5644   const auto &CGFI =
5645       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5646   std::string Name = getName({"omp", "reduction", "reduction_func"});
5647   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5648                                     llvm::GlobalValue::InternalLinkage, Name,
5649                                     &CGM.getModule());
5650   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5651   Fn->setDoesNotRecurse();
5652   CodeGenFunction CGF(CGM);
5653   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5654 
5655   // Dst = (void*[n])(LHSArg);
5656   // Src = (void*[n])(RHSArg);
5657   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5658       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5659       ArgsType), CGF.getPointerAlign());
5660   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5661       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5662       ArgsType), CGF.getPointerAlign());
5663 
5664   //  ...
5665   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5666   //  ...
5667   CodeGenFunction::OMPPrivateScope Scope(CGF);
5668   auto IPriv = Privates.begin();
5669   unsigned Idx = 0;
5670   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5671     const auto *RHSVar =
5672         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5673     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5674       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5675     });
5676     const auto *LHSVar =
5677         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5678     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5679       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5680     });
5681     QualType PrivTy = (*IPriv)->getType();
5682     if (PrivTy->isVariablyModifiedType()) {
5683       // Get array size and emit VLA type.
5684       ++Idx;
5685       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5686       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5687       const VariableArrayType *VLA =
5688           CGF.getContext().getAsVariableArrayType(PrivTy);
5689       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5690       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5691           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5692       CGF.EmitVariablyModifiedType(PrivTy);
5693     }
5694   }
5695   Scope.Privatize();
5696   IPriv = Privates.begin();
5697   auto ILHS = LHSExprs.begin();
5698   auto IRHS = RHSExprs.begin();
5699   for (const Expr *E : ReductionOps) {
5700     if ((*IPriv)->getType()->isArrayType()) {
5701       // Emit reduction for array section.
5702       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5703       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5704       EmitOMPAggregateReduction(
5705           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5706           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5707             emitReductionCombiner(CGF, E);
5708           });
5709     } else {
5710       // Emit reduction for array subscript or single variable.
5711       emitReductionCombiner(CGF, E);
5712     }
5713     ++IPriv;
5714     ++ILHS;
5715     ++IRHS;
5716   }
5717   Scope.ForceCleanup();
5718   CGF.FinishFunction();
5719   return Fn;
5720 }
5721 
5722 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5723                                                   const Expr *ReductionOp,
5724                                                   const Expr *PrivateRef,
5725                                                   const DeclRefExpr *LHS,
5726                                                   const DeclRefExpr *RHS) {
5727   if (PrivateRef->getType()->isArrayType()) {
5728     // Emit reduction for array section.
5729     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5730     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5731     EmitOMPAggregateReduction(
5732         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5733         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5734           emitReductionCombiner(CGF, ReductionOp);
5735         });
5736   } else {
5737     // Emit reduction for array subscript or single variable.
5738     emitReductionCombiner(CGF, ReductionOp);
5739   }
5740 }
5741 
5742 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5743                                     ArrayRef<const Expr *> Privates,
5744                                     ArrayRef<const Expr *> LHSExprs,
5745                                     ArrayRef<const Expr *> RHSExprs,
5746                                     ArrayRef<const Expr *> ReductionOps,
5747                                     ReductionOptionsTy Options) {
5748   if (!CGF.HaveInsertPoint())
5749     return;
5750 
5751   bool WithNowait = Options.WithNowait;
5752   bool SimpleReduction = Options.SimpleReduction;
5753 
5754   // Next code should be emitted for reduction:
5755   //
5756   // static kmp_critical_name lock = { 0 };
5757   //
5758   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5759   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5760   //  ...
5761   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5762   //  *(Type<n>-1*)rhs[<n>-1]);
5763   // }
5764   //
5765   // ...
5766   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5767   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5768   // RedList, reduce_func, &<lock>)) {
5769   // case 1:
5770   //  ...
5771   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5772   //  ...
5773   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5774   // break;
5775   // case 2:
5776   //  ...
5777   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5778   //  ...
5779   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5780   // break;
5781   // default:;
5782   // }
5783   //
5784   // if SimpleReduction is true, only the next code is generated:
5785   //  ...
5786   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5787   //  ...
5788 
5789   ASTContext &C = CGM.getContext();
5790 
5791   if (SimpleReduction) {
5792     CodeGenFunction::RunCleanupsScope Scope(CGF);
5793     auto IPriv = Privates.begin();
5794     auto ILHS = LHSExprs.begin();
5795     auto IRHS = RHSExprs.begin();
5796     for (const Expr *E : ReductionOps) {
5797       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5798                                   cast<DeclRefExpr>(*IRHS));
5799       ++IPriv;
5800       ++ILHS;
5801       ++IRHS;
5802     }
5803     return;
5804   }
5805 
5806   // 1. Build a list of reduction variables.
5807   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5808   auto Size = RHSExprs.size();
5809   for (const Expr *E : Privates) {
5810     if (E->getType()->isVariablyModifiedType())
5811       // Reserve place for array size.
5812       ++Size;
5813   }
5814   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5815   QualType ReductionArrayTy =
5816       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5817                              /*IndexTypeQuals=*/0);
5818   Address ReductionList =
5819       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5820   auto IPriv = Privates.begin();
5821   unsigned Idx = 0;
5822   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5823     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5824     CGF.Builder.CreateStore(
5825         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5826             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5827         Elem);
5828     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5829       // Store array size.
5830       ++Idx;
5831       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5832       llvm::Value *Size = CGF.Builder.CreateIntCast(
5833           CGF.getVLASize(
5834                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5835               .NumElts,
5836           CGF.SizeTy, /*isSigned=*/false);
5837       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5838                               Elem);
5839     }
5840   }
5841 
5842   // 2. Emit reduce_func().
5843   llvm::Function *ReductionFn = emitReductionFunction(
5844       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5845       LHSExprs, RHSExprs, ReductionOps);
5846 
5847   // 3. Create static kmp_critical_name lock = { 0 };
5848   std::string Name = getName({"reduction"});
5849   llvm::Value *Lock = getCriticalRegionLock(Name);
5850 
5851   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5852   // RedList, reduce_func, &<lock>);
5853   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5854   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5855   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5856   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5857       ReductionList.getPointer(), CGF.VoidPtrTy);
5858   llvm::Value *Args[] = {
5859       IdentTLoc,                             // ident_t *<loc>
5860       ThreadId,                              // i32 <gtid>
5861       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5862       ReductionArrayTySize,                  // size_type sizeof(RedList)
5863       RL,                                    // void *RedList
5864       ReductionFn, // void (*) (void *, void *) <reduce_func>
5865       Lock         // kmp_critical_name *&<lock>
5866   };
5867   llvm::Value *Res = CGF.EmitRuntimeCall(
5868       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5869                                        : OMPRTL__kmpc_reduce),
5870       Args);
5871 
5872   // 5. Build switch(res)
5873   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5874   llvm::SwitchInst *SwInst =
5875       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5876 
5877   // 6. Build case 1:
5878   //  ...
5879   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5880   //  ...
5881   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5882   // break;
5883   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5884   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5885   CGF.EmitBlock(Case1BB);
5886 
5887   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5888   llvm::Value *EndArgs[] = {
5889       IdentTLoc, // ident_t *<loc>
5890       ThreadId,  // i32 <gtid>
5891       Lock       // kmp_critical_name *&<lock>
5892   };
5893   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5894                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5895     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5896     auto IPriv = Privates.begin();
5897     auto ILHS = LHSExprs.begin();
5898     auto IRHS = RHSExprs.begin();
5899     for (const Expr *E : ReductionOps) {
5900       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5901                                      cast<DeclRefExpr>(*IRHS));
5902       ++IPriv;
5903       ++ILHS;
5904       ++IRHS;
5905     }
5906   };
5907   RegionCodeGenTy RCG(CodeGen);
5908   CommonActionTy Action(
5909       nullptr, llvm::None,
5910       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5911                                        : OMPRTL__kmpc_end_reduce),
5912       EndArgs);
5913   RCG.setAction(Action);
5914   RCG(CGF);
5915 
5916   CGF.EmitBranch(DefaultBB);
5917 
5918   // 7. Build case 2:
5919   //  ...
5920   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5921   //  ...
5922   // break;
5923   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5924   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5925   CGF.EmitBlock(Case2BB);
5926 
5927   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5928                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5929     auto ILHS = LHSExprs.begin();
5930     auto IRHS = RHSExprs.begin();
5931     auto IPriv = Privates.begin();
5932     for (const Expr *E : ReductionOps) {
5933       const Expr *XExpr = nullptr;
5934       const Expr *EExpr = nullptr;
5935       const Expr *UpExpr = nullptr;
5936       BinaryOperatorKind BO = BO_Comma;
5937       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5938         if (BO->getOpcode() == BO_Assign) {
5939           XExpr = BO->getLHS();
5940           UpExpr = BO->getRHS();
5941         }
5942       }
5943       // Try to emit update expression as a simple atomic.
5944       const Expr *RHSExpr = UpExpr;
5945       if (RHSExpr) {
5946         // Analyze RHS part of the whole expression.
5947         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5948                 RHSExpr->IgnoreParenImpCasts())) {
5949           // If this is a conditional operator, analyze its condition for
5950           // min/max reduction operator.
5951           RHSExpr = ACO->getCond();
5952         }
5953         if (const auto *BORHS =
5954                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5955           EExpr = BORHS->getRHS();
5956           BO = BORHS->getOpcode();
5957         }
5958       }
5959       if (XExpr) {
5960         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5961         auto &&AtomicRedGen = [BO, VD,
5962                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5963                                     const Expr *EExpr, const Expr *UpExpr) {
5964           LValue X = CGF.EmitLValue(XExpr);
5965           RValue E;
5966           if (EExpr)
5967             E = CGF.EmitAnyExpr(EExpr);
5968           CGF.EmitOMPAtomicSimpleUpdateExpr(
5969               X, E, BO, /*IsXLHSInRHSPart=*/true,
5970               llvm::AtomicOrdering::Monotonic, Loc,
5971               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5972                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5973                 PrivateScope.addPrivate(
5974                     VD, [&CGF, VD, XRValue, Loc]() {
5975                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5976                       CGF.emitOMPSimpleStore(
5977                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5978                           VD->getType().getNonReferenceType(), Loc);
5979                       return LHSTemp;
5980                     });
5981                 (void)PrivateScope.Privatize();
5982                 return CGF.EmitAnyExpr(UpExpr);
5983               });
5984         };
5985         if ((*IPriv)->getType()->isArrayType()) {
5986           // Emit atomic reduction for array section.
5987           const auto *RHSVar =
5988               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5989           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5990                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5991         } else {
5992           // Emit atomic reduction for array subscript or single variable.
5993           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5994         }
5995       } else {
5996         // Emit as a critical region.
5997         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5998                                            const Expr *, const Expr *) {
5999           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6000           std::string Name = RT.getName({"atomic_reduction"});
6001           RT.emitCriticalRegion(
6002               CGF, Name,
6003               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6004                 Action.Enter(CGF);
6005                 emitReductionCombiner(CGF, E);
6006               },
6007               Loc);
6008         };
6009         if ((*IPriv)->getType()->isArrayType()) {
6010           const auto *LHSVar =
6011               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6012           const auto *RHSVar =
6013               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6014           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6015                                     CritRedGen);
6016         } else {
6017           CritRedGen(CGF, nullptr, nullptr, nullptr);
6018         }
6019       }
6020       ++ILHS;
6021       ++IRHS;
6022       ++IPriv;
6023     }
6024   };
6025   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6026   if (!WithNowait) {
6027     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6028     llvm::Value *EndArgs[] = {
6029         IdentTLoc, // ident_t *<loc>
6030         ThreadId,  // i32 <gtid>
6031         Lock       // kmp_critical_name *&<lock>
6032     };
6033     CommonActionTy Action(nullptr, llvm::None,
6034                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6035                           EndArgs);
6036     AtomicRCG.setAction(Action);
6037     AtomicRCG(CGF);
6038   } else {
6039     AtomicRCG(CGF);
6040   }
6041 
6042   CGF.EmitBranch(DefaultBB);
6043   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6044 }
6045 
6046 /// Generates unique name for artificial threadprivate variables.
6047 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6048 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6049                                       const Expr *Ref) {
6050   SmallString<256> Buffer;
6051   llvm::raw_svector_ostream Out(Buffer);
6052   const clang::DeclRefExpr *DE;
6053   const VarDecl *D = ::getBaseDecl(Ref, DE);
6054   if (!D)
6055     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6056   D = D->getCanonicalDecl();
6057   std::string Name = CGM.getOpenMPRuntime().getName(
6058       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6059   Out << Prefix << Name << "_"
6060       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6061   return Out.str();
6062 }
6063 
6064 /// Emits reduction initializer function:
6065 /// \code
6066 /// void @.red_init(void* %arg) {
6067 /// %0 = bitcast void* %arg to <type>*
6068 /// store <type> <init>, <type>* %0
6069 /// ret void
6070 /// }
6071 /// \endcode
6072 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6073                                            SourceLocation Loc,
6074                                            ReductionCodeGen &RCG, unsigned N) {
6075   ASTContext &C = CGM.getContext();
6076   FunctionArgList Args;
6077   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6078                           ImplicitParamDecl::Other);
6079   Args.emplace_back(&Param);
6080   const auto &FnInfo =
6081       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6082   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6083   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6084   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6085                                     Name, &CGM.getModule());
6086   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6087   Fn->setDoesNotRecurse();
6088   CodeGenFunction CGF(CGM);
6089   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6090   Address PrivateAddr = CGF.EmitLoadOfPointer(
6091       CGF.GetAddrOfLocalVar(&Param),
6092       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6093   llvm::Value *Size = nullptr;
6094   // If the size of the reduction item is non-constant, load it from global
6095   // threadprivate variable.
6096   if (RCG.getSizes(N).second) {
6097     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6098         CGF, CGM.getContext().getSizeType(),
6099         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6100     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6101                                 CGM.getContext().getSizeType(), Loc);
6102   }
6103   RCG.emitAggregateType(CGF, N, Size);
6104   LValue SharedLVal;
6105   // If initializer uses initializer from declare reduction construct, emit a
6106   // pointer to the address of the original reduction item (reuired by reduction
6107   // initializer)
6108   if (RCG.usesReductionInitializer(N)) {
6109     Address SharedAddr =
6110         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6111             CGF, CGM.getContext().VoidPtrTy,
6112             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6113     SharedAddr = CGF.EmitLoadOfPointer(
6114         SharedAddr,
6115         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6116     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6117   } else {
6118     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6119         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6120         CGM.getContext().VoidPtrTy);
6121   }
6122   // Emit the initializer:
6123   // %0 = bitcast void* %arg to <type>*
6124   // store <type> <init>, <type>* %0
6125   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6126                          [](CodeGenFunction &) { return false; });
6127   CGF.FinishFunction();
6128   return Fn;
6129 }
6130 
6131 /// Emits reduction combiner function:
6132 /// \code
6133 /// void @.red_comb(void* %arg0, void* %arg1) {
6134 /// %lhs = bitcast void* %arg0 to <type>*
6135 /// %rhs = bitcast void* %arg1 to <type>*
6136 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6137 /// store <type> %2, <type>* %lhs
6138 /// ret void
6139 /// }
6140 /// \endcode
6141 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6142                                            SourceLocation Loc,
6143                                            ReductionCodeGen &RCG, unsigned N,
6144                                            const Expr *ReductionOp,
6145                                            const Expr *LHS, const Expr *RHS,
6146                                            const Expr *PrivateRef) {
6147   ASTContext &C = CGM.getContext();
6148   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6149   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6150   FunctionArgList Args;
6151   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6152                                C.VoidPtrTy, ImplicitParamDecl::Other);
6153   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6154                             ImplicitParamDecl::Other);
6155   Args.emplace_back(&ParamInOut);
6156   Args.emplace_back(&ParamIn);
6157   const auto &FnInfo =
6158       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6159   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6160   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6161   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6162                                     Name, &CGM.getModule());
6163   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6164   Fn->setDoesNotRecurse();
6165   CodeGenFunction CGF(CGM);
6166   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6167   llvm::Value *Size = nullptr;
6168   // If the size of the reduction item is non-constant, load it from global
6169   // threadprivate variable.
6170   if (RCG.getSizes(N).second) {
6171     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6172         CGF, CGM.getContext().getSizeType(),
6173         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6174     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6175                                 CGM.getContext().getSizeType(), Loc);
6176   }
6177   RCG.emitAggregateType(CGF, N, Size);
6178   // Remap lhs and rhs variables to the addresses of the function arguments.
6179   // %lhs = bitcast void* %arg0 to <type>*
6180   // %rhs = bitcast void* %arg1 to <type>*
6181   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6182   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6183     // Pull out the pointer to the variable.
6184     Address PtrAddr = CGF.EmitLoadOfPointer(
6185         CGF.GetAddrOfLocalVar(&ParamInOut),
6186         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6187     return CGF.Builder.CreateElementBitCast(
6188         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6189   });
6190   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6191     // Pull out the pointer to the variable.
6192     Address PtrAddr = CGF.EmitLoadOfPointer(
6193         CGF.GetAddrOfLocalVar(&ParamIn),
6194         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6195     return CGF.Builder.CreateElementBitCast(
6196         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6197   });
6198   PrivateScope.Privatize();
6199   // Emit the combiner body:
6200   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6201   // store <type> %2, <type>* %lhs
6202   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6203       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6204       cast<DeclRefExpr>(RHS));
6205   CGF.FinishFunction();
6206   return Fn;
6207 }
6208 
6209 /// Emits reduction finalizer function:
6210 /// \code
6211 /// void @.red_fini(void* %arg) {
6212 /// %0 = bitcast void* %arg to <type>*
6213 /// <destroy>(<type>* %0)
6214 /// ret void
6215 /// }
6216 /// \endcode
6217 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6218                                            SourceLocation Loc,
6219                                            ReductionCodeGen &RCG, unsigned N) {
6220   if (!RCG.needCleanups(N))
6221     return nullptr;
6222   ASTContext &C = CGM.getContext();
6223   FunctionArgList Args;
6224   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6225                           ImplicitParamDecl::Other);
6226   Args.emplace_back(&Param);
6227   const auto &FnInfo =
6228       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6229   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6230   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6231   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6232                                     Name, &CGM.getModule());
6233   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6234   Fn->setDoesNotRecurse();
6235   CodeGenFunction CGF(CGM);
6236   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6237   Address PrivateAddr = CGF.EmitLoadOfPointer(
6238       CGF.GetAddrOfLocalVar(&Param),
6239       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6240   llvm::Value *Size = nullptr;
6241   // If the size of the reduction item is non-constant, load it from global
6242   // threadprivate variable.
6243   if (RCG.getSizes(N).second) {
6244     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6245         CGF, CGM.getContext().getSizeType(),
6246         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6247     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6248                                 CGM.getContext().getSizeType(), Loc);
6249   }
6250   RCG.emitAggregateType(CGF, N, Size);
6251   // Emit the finalizer body:
6252   // <destroy>(<type>* %0)
6253   RCG.emitCleanups(CGF, N, PrivateAddr);
6254   CGF.FinishFunction();
6255   return Fn;
6256 }
6257 
6258 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6259     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6260     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6261   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6262     return nullptr;
6263 
6264   // Build typedef struct:
6265   // kmp_task_red_input {
6266   //   void *reduce_shar; // shared reduction item
6267   //   size_t reduce_size; // size of data item
6268   //   void *reduce_init; // data initialization routine
6269   //   void *reduce_fini; // data finalization routine
6270   //   void *reduce_comb; // data combiner routine
6271   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6272   // } kmp_task_red_input_t;
6273   ASTContext &C = CGM.getContext();
6274   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6275   RD->startDefinition();
6276   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6277   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6278   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6279   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6280   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6281   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6282       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6283   RD->completeDefinition();
6284   QualType RDType = C.getRecordType(RD);
6285   unsigned Size = Data.ReductionVars.size();
6286   llvm::APInt ArraySize(/*numBits=*/64, Size);
6287   QualType ArrayRDType = C.getConstantArrayType(
6288       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6289   // kmp_task_red_input_t .rd_input.[Size];
6290   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6291   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6292                        Data.ReductionOps);
6293   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6294     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6295     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6296                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6297     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6298         TaskRedInput.getPointer(), Idxs,
6299         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6300         ".rd_input.gep.");
6301     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6302     // ElemLVal.reduce_shar = &Shareds[Cnt];
6303     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6304     RCG.emitSharedLValue(CGF, Cnt);
6305     llvm::Value *CastedShared =
6306         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6307     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6308     RCG.emitAggregateType(CGF, Cnt);
6309     llvm::Value *SizeValInChars;
6310     llvm::Value *SizeVal;
6311     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6312     // We use delayed creation/initialization for VLAs, array sections and
6313     // custom reduction initializations. It is required because runtime does not
6314     // provide the way to pass the sizes of VLAs/array sections to
6315     // initializer/combiner/finalizer functions and does not pass the pointer to
6316     // original reduction item to the initializer. Instead threadprivate global
6317     // variables are used to store these values and use them in the functions.
6318     bool DelayedCreation = !!SizeVal;
6319     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6320                                                /*isSigned=*/false);
6321     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6322     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6323     // ElemLVal.reduce_init = init;
6324     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6325     llvm::Value *InitAddr =
6326         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6327     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6328     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6329     // ElemLVal.reduce_fini = fini;
6330     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6331     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6332     llvm::Value *FiniAddr = Fini
6333                                 ? CGF.EmitCastToVoidPtr(Fini)
6334                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6335     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6336     // ElemLVal.reduce_comb = comb;
6337     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6338     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6339         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6340         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6341     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6342     // ElemLVal.flags = 0;
6343     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6344     if (DelayedCreation) {
6345       CGF.EmitStoreOfScalar(
6346           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6347           FlagsLVal);
6348     } else
6349       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6350   }
6351   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6352   // *data);
6353   llvm::Value *Args[] = {
6354       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6355                                 /*isSigned=*/true),
6356       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6357       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6358                                                       CGM.VoidPtrTy)};
6359   return CGF.EmitRuntimeCall(
6360       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6361 }
6362 
6363 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6364                                               SourceLocation Loc,
6365                                               ReductionCodeGen &RCG,
6366                                               unsigned N) {
6367   auto Sizes = RCG.getSizes(N);
6368   // Emit threadprivate global variable if the type is non-constant
6369   // (Sizes.second = nullptr).
6370   if (Sizes.second) {
6371     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6372                                                      /*isSigned=*/false);
6373     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6374         CGF, CGM.getContext().getSizeType(),
6375         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6376     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6377   }
6378   // Store address of the original reduction item if custom initializer is used.
6379   if (RCG.usesReductionInitializer(N)) {
6380     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6381         CGF, CGM.getContext().VoidPtrTy,
6382         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6383     CGF.Builder.CreateStore(
6384         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6385             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6386         SharedAddr, /*IsVolatile=*/false);
6387   }
6388 }
6389 
6390 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6391                                               SourceLocation Loc,
6392                                               llvm::Value *ReductionsPtr,
6393                                               LValue SharedLVal) {
6394   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6395   // *d);
6396   llvm::Value *Args[] = {
6397       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6398                                 /*isSigned=*/true),
6399       ReductionsPtr,
6400       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6401                                                       CGM.VoidPtrTy)};
6402   return Address(
6403       CGF.EmitRuntimeCall(
6404           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6405       SharedLVal.getAlignment());
6406 }
6407 
6408 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6409                                        SourceLocation Loc) {
6410   if (!CGF.HaveInsertPoint())
6411     return;
6412   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6413   // global_tid);
6414   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6415   // Ignore return result until untied tasks are supported.
6416   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6417   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6418     Region->emitUntiedSwitch(CGF);
6419 }
6420 
6421 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6422                                            OpenMPDirectiveKind InnerKind,
6423                                            const RegionCodeGenTy &CodeGen,
6424                                            bool HasCancel) {
6425   if (!CGF.HaveInsertPoint())
6426     return;
6427   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6428   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6429 }
6430 
6431 namespace {
6432 enum RTCancelKind {
6433   CancelNoreq = 0,
6434   CancelParallel = 1,
6435   CancelLoop = 2,
6436   CancelSections = 3,
6437   CancelTaskgroup = 4
6438 };
6439 } // anonymous namespace
6440 
6441 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6442   RTCancelKind CancelKind = CancelNoreq;
6443   if (CancelRegion == OMPD_parallel)
6444     CancelKind = CancelParallel;
6445   else if (CancelRegion == OMPD_for)
6446     CancelKind = CancelLoop;
6447   else if (CancelRegion == OMPD_sections)
6448     CancelKind = CancelSections;
6449   else {
6450     assert(CancelRegion == OMPD_taskgroup);
6451     CancelKind = CancelTaskgroup;
6452   }
6453   return CancelKind;
6454 }
6455 
6456 void CGOpenMPRuntime::emitCancellationPointCall(
6457     CodeGenFunction &CGF, SourceLocation Loc,
6458     OpenMPDirectiveKind CancelRegion) {
6459   if (!CGF.HaveInsertPoint())
6460     return;
6461   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6462   // global_tid, kmp_int32 cncl_kind);
6463   if (auto *OMPRegionInfo =
6464           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6465     // For 'cancellation point taskgroup', the task region info may not have a
6466     // cancel. This may instead happen in another adjacent task.
6467     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6468       llvm::Value *Args[] = {
6469           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6470           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6471       // Ignore return result until untied tasks are supported.
6472       llvm::Value *Result = CGF.EmitRuntimeCall(
6473           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6474       // if (__kmpc_cancellationpoint()) {
6475       //   exit from construct;
6476       // }
6477       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6478       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6479       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6480       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6481       CGF.EmitBlock(ExitBB);
6482       // exit from construct;
6483       CodeGenFunction::JumpDest CancelDest =
6484           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6485       CGF.EmitBranchThroughCleanup(CancelDest);
6486       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6487     }
6488   }
6489 }
6490 
6491 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6492                                      const Expr *IfCond,
6493                                      OpenMPDirectiveKind CancelRegion) {
6494   if (!CGF.HaveInsertPoint())
6495     return;
6496   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6497   // kmp_int32 cncl_kind);
6498   if (auto *OMPRegionInfo =
6499           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6500     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6501                                                         PrePostActionTy &) {
6502       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6503       llvm::Value *Args[] = {
6504           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6505           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6506       // Ignore return result until untied tasks are supported.
6507       llvm::Value *Result = CGF.EmitRuntimeCall(
6508           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6509       // if (__kmpc_cancel()) {
6510       //   exit from construct;
6511       // }
6512       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6513       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6514       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6515       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6516       CGF.EmitBlock(ExitBB);
6517       // exit from construct;
6518       CodeGenFunction::JumpDest CancelDest =
6519           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6520       CGF.EmitBranchThroughCleanup(CancelDest);
6521       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6522     };
6523     if (IfCond) {
6524       emitOMPIfClause(CGF, IfCond, ThenGen,
6525                       [](CodeGenFunction &, PrePostActionTy &) {});
6526     } else {
6527       RegionCodeGenTy ThenRCG(ThenGen);
6528       ThenRCG(CGF);
6529     }
6530   }
6531 }
6532 
6533 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6534     const OMPExecutableDirective &D, StringRef ParentName,
6535     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6536     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6537   assert(!ParentName.empty() && "Invalid target region parent name!");
6538   HasEmittedTargetRegion = true;
6539   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6540                                    IsOffloadEntry, CodeGen);
6541 }
6542 
6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6544     const OMPExecutableDirective &D, StringRef ParentName,
6545     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6546     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6547   // Create a unique name for the entry function using the source location
6548   // information of the current target region. The name will be something like:
6549   //
6550   // __omp_offloading_DD_FFFF_PP_lBB
6551   //
6552   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6553   // mangled name of the function that encloses the target region and BB is the
6554   // line number of the target region.
6555 
6556   unsigned DeviceID;
6557   unsigned FileID;
6558   unsigned Line;
6559   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6560                            Line);
6561   SmallString<64> EntryFnName;
6562   {
6563     llvm::raw_svector_ostream OS(EntryFnName);
6564     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6565        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6566   }
6567 
6568   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6569 
6570   CodeGenFunction CGF(CGM, true);
6571   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6572   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6573 
6574   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6575 
6576   // If this target outline function is not an offload entry, we don't need to
6577   // register it.
6578   if (!IsOffloadEntry)
6579     return;
6580 
6581   // The target region ID is used by the runtime library to identify the current
6582   // target region, so it only has to be unique and not necessarily point to
6583   // anything. It could be the pointer to the outlined function that implements
6584   // the target region, but we aren't using that so that the compiler doesn't
6585   // need to keep that, and could therefore inline the host function if proven
6586   // worthwhile during optimization. In the other hand, if emitting code for the
6587   // device, the ID has to be the function address so that it can retrieved from
6588   // the offloading entry and launched by the runtime library. We also mark the
6589   // outlined function to have external linkage in case we are emitting code for
6590   // the device, because these functions will be entry points to the device.
6591 
6592   if (CGM.getLangOpts().OpenMPIsDevice) {
6593     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6594     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6595     OutlinedFn->setDSOLocal(false);
6596   } else {
6597     std::string Name = getName({EntryFnName, "region_id"});
6598     OutlinedFnID = new llvm::GlobalVariable(
6599         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6600         llvm::GlobalValue::WeakAnyLinkage,
6601         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6602   }
6603 
6604   // Register the information for the entry associated with this target region.
6605   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6606       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6607       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6608 }
6609 
6610 /// Checks if the expression is constant or does not have non-trivial function
6611 /// calls.
6612 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6613   // We can skip constant expressions.
6614   // We can skip expressions with trivial calls or simple expressions.
6615   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6616           !E->hasNonTrivialCall(Ctx)) &&
6617          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6618 }
6619 
6620 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6621                                                     const Stmt *Body) {
6622   const Stmt *Child = Body->IgnoreContainers();
6623   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6624     Child = nullptr;
6625     for (const Stmt *S : C->body()) {
6626       if (const auto *E = dyn_cast<Expr>(S)) {
6627         if (isTrivial(Ctx, E))
6628           continue;
6629       }
6630       // Some of the statements can be ignored.
6631       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6632           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6633         continue;
6634       // Analyze declarations.
6635       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6636         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6637               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6638                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6639                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6640                   isa<UsingDirectiveDecl>(D) ||
6641                   isa<OMPDeclareReductionDecl>(D) ||
6642                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6643                 return true;
6644               const auto *VD = dyn_cast<VarDecl>(D);
6645               if (!VD)
6646                 return false;
6647               return VD->isConstexpr() ||
6648                      ((VD->getType().isTrivialType(Ctx) ||
6649                        VD->getType()->isReferenceType()) &&
6650                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6651             }))
6652           continue;
6653       }
6654       // Found multiple children - cannot get the one child only.
6655       if (Child)
6656         return nullptr;
6657       Child = S;
6658     }
6659     if (Child)
6660       Child = Child->IgnoreContainers();
6661   }
6662   return Child;
6663 }
6664 
6665 /// Emit the number of teams for a target directive.  Inspect the num_teams
6666 /// clause associated with a teams construct combined or closely nested
6667 /// with the target directive.
6668 ///
6669 /// Emit a team of size one for directives such as 'target parallel' that
6670 /// have no associated teams construct.
6671 ///
6672 /// Otherwise, return nullptr.
6673 static llvm::Value *
6674 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6675                                const OMPExecutableDirective &D) {
6676   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6677          "Clauses associated with the teams directive expected to be emitted "
6678          "only for the host!");
6679   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6680   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6681          "Expected target-based executable directive.");
6682   CGBuilderTy &Bld = CGF.Builder;
6683   switch (DirectiveKind) {
6684   case OMPD_target: {
6685     const auto *CS = D.getInnermostCapturedStmt();
6686     const auto *Body =
6687         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6688     const Stmt *ChildStmt =
6689         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6690     if (const auto *NestedDir =
6691             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6692       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6693         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6694           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6695           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6696           const Expr *NumTeams =
6697               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6698           llvm::Value *NumTeamsVal =
6699               CGF.EmitScalarExpr(NumTeams,
6700                                  /*IgnoreResultAssign*/ true);
6701           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6702                                    /*isSigned=*/true);
6703         }
6704         return Bld.getInt32(0);
6705       }
6706       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6707           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6708         return Bld.getInt32(1);
6709       return Bld.getInt32(0);
6710     }
6711     return nullptr;
6712   }
6713   case OMPD_target_teams:
6714   case OMPD_target_teams_distribute:
6715   case OMPD_target_teams_distribute_simd:
6716   case OMPD_target_teams_distribute_parallel_for:
6717   case OMPD_target_teams_distribute_parallel_for_simd: {
6718     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6719       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6720       const Expr *NumTeams =
6721           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6722       llvm::Value *NumTeamsVal =
6723           CGF.EmitScalarExpr(NumTeams,
6724                              /*IgnoreResultAssign*/ true);
6725       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6726                                /*isSigned=*/true);
6727     }
6728     return Bld.getInt32(0);
6729   }
6730   case OMPD_target_parallel:
6731   case OMPD_target_parallel_for:
6732   case OMPD_target_parallel_for_simd:
6733   case OMPD_target_simd:
6734     return Bld.getInt32(1);
6735   case OMPD_parallel:
6736   case OMPD_for:
6737   case OMPD_parallel_for:
6738   case OMPD_parallel_sections:
6739   case OMPD_for_simd:
6740   case OMPD_parallel_for_simd:
6741   case OMPD_cancel:
6742   case OMPD_cancellation_point:
6743   case OMPD_ordered:
6744   case OMPD_threadprivate:
6745   case OMPD_allocate:
6746   case OMPD_task:
6747   case OMPD_simd:
6748   case OMPD_sections:
6749   case OMPD_section:
6750   case OMPD_single:
6751   case OMPD_master:
6752   case OMPD_critical:
6753   case OMPD_taskyield:
6754   case OMPD_barrier:
6755   case OMPD_taskwait:
6756   case OMPD_taskgroup:
6757   case OMPD_atomic:
6758   case OMPD_flush:
6759   case OMPD_teams:
6760   case OMPD_target_data:
6761   case OMPD_target_exit_data:
6762   case OMPD_target_enter_data:
6763   case OMPD_distribute:
6764   case OMPD_distribute_simd:
6765   case OMPD_distribute_parallel_for:
6766   case OMPD_distribute_parallel_for_simd:
6767   case OMPD_teams_distribute:
6768   case OMPD_teams_distribute_simd:
6769   case OMPD_teams_distribute_parallel_for:
6770   case OMPD_teams_distribute_parallel_for_simd:
6771   case OMPD_target_update:
6772   case OMPD_declare_simd:
6773   case OMPD_declare_variant:
6774   case OMPD_declare_target:
6775   case OMPD_end_declare_target:
6776   case OMPD_declare_reduction:
6777   case OMPD_declare_mapper:
6778   case OMPD_taskloop:
6779   case OMPD_taskloop_simd:
6780   case OMPD_requires:
6781   case OMPD_unknown:
6782     break;
6783   }
6784   llvm_unreachable("Unexpected directive kind.");
6785 }
6786 
6787 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6788                                   llvm::Value *DefaultThreadLimitVal) {
6789   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6790       CGF.getContext(), CS->getCapturedStmt());
6791   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6792     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6793       llvm::Value *NumThreads = nullptr;
6794       llvm::Value *CondVal = nullptr;
6795       // Handle if clause. If if clause present, the number of threads is
6796       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6797       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6798         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6799         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6800         const OMPIfClause *IfClause = nullptr;
6801         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6802           if (C->getNameModifier() == OMPD_unknown ||
6803               C->getNameModifier() == OMPD_parallel) {
6804             IfClause = C;
6805             break;
6806           }
6807         }
6808         if (IfClause) {
6809           const Expr *Cond = IfClause->getCondition();
6810           bool Result;
6811           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6812             if (!Result)
6813               return CGF.Builder.getInt32(1);
6814           } else {
6815             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6816             if (const auto *PreInit =
6817                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6818               for (const auto *I : PreInit->decls()) {
6819                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6820                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6821                 } else {
6822                   CodeGenFunction::AutoVarEmission Emission =
6823                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6824                   CGF.EmitAutoVarCleanups(Emission);
6825                 }
6826               }
6827             }
6828             CondVal = CGF.EvaluateExprAsBool(Cond);
6829           }
6830         }
6831       }
6832       // Check the value of num_threads clause iff if clause was not specified
6833       // or is not evaluated to false.
6834       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6835         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6836         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6837         const auto *NumThreadsClause =
6838             Dir->getSingleClause<OMPNumThreadsClause>();
6839         CodeGenFunction::LexicalScope Scope(
6840             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6841         if (const auto *PreInit =
6842                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6843           for (const auto *I : PreInit->decls()) {
6844             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6845               CGF.EmitVarDecl(cast<VarDecl>(*I));
6846             } else {
6847               CodeGenFunction::AutoVarEmission Emission =
6848                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6849               CGF.EmitAutoVarCleanups(Emission);
6850             }
6851           }
6852         }
6853         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6854         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6855                                                /*isSigned=*/false);
6856         if (DefaultThreadLimitVal)
6857           NumThreads = CGF.Builder.CreateSelect(
6858               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6859               DefaultThreadLimitVal, NumThreads);
6860       } else {
6861         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6862                                            : CGF.Builder.getInt32(0);
6863       }
6864       // Process condition of the if clause.
6865       if (CondVal) {
6866         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6867                                               CGF.Builder.getInt32(1));
6868       }
6869       return NumThreads;
6870     }
6871     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6872       return CGF.Builder.getInt32(1);
6873     return DefaultThreadLimitVal;
6874   }
6875   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6876                                : CGF.Builder.getInt32(0);
6877 }
6878 
6879 /// Emit the number of threads for a target directive.  Inspect the
6880 /// thread_limit clause associated with a teams construct combined or closely
6881 /// nested with the target directive.
6882 ///
6883 /// Emit the num_threads clause for directives such as 'target parallel' that
6884 /// have no associated teams construct.
6885 ///
6886 /// Otherwise, return nullptr.
6887 static llvm::Value *
6888 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6889                                  const OMPExecutableDirective &D) {
6890   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6891          "Clauses associated with the teams directive expected to be emitted "
6892          "only for the host!");
6893   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6894   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6895          "Expected target-based executable directive.");
6896   CGBuilderTy &Bld = CGF.Builder;
6897   llvm::Value *ThreadLimitVal = nullptr;
6898   llvm::Value *NumThreadsVal = nullptr;
6899   switch (DirectiveKind) {
6900   case OMPD_target: {
6901     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6902     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6903       return NumThreads;
6904     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6905         CGF.getContext(), CS->getCapturedStmt());
6906     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6907       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6908         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6909         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6910         const auto *ThreadLimitClause =
6911             Dir->getSingleClause<OMPThreadLimitClause>();
6912         CodeGenFunction::LexicalScope Scope(
6913             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6914         if (const auto *PreInit =
6915                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6916           for (const auto *I : PreInit->decls()) {
6917             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6918               CGF.EmitVarDecl(cast<VarDecl>(*I));
6919             } else {
6920               CodeGenFunction::AutoVarEmission Emission =
6921                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6922               CGF.EmitAutoVarCleanups(Emission);
6923             }
6924           }
6925         }
6926         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6927             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6928         ThreadLimitVal =
6929             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6930       }
6931       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6932           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6933         CS = Dir->getInnermostCapturedStmt();
6934         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6935             CGF.getContext(), CS->getCapturedStmt());
6936         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6937       }
6938       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6939           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6940         CS = Dir->getInnermostCapturedStmt();
6941         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6942           return NumThreads;
6943       }
6944       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6945         return Bld.getInt32(1);
6946     }
6947     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6948   }
6949   case OMPD_target_teams: {
6950     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6951       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6952       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6953       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6954           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6955       ThreadLimitVal =
6956           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6957     }
6958     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6959     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6960       return NumThreads;
6961     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6962         CGF.getContext(), CS->getCapturedStmt());
6963     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6964       if (Dir->getDirectiveKind() == OMPD_distribute) {
6965         CS = Dir->getInnermostCapturedStmt();
6966         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6967           return NumThreads;
6968       }
6969     }
6970     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6971   }
6972   case OMPD_target_teams_distribute:
6973     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6974       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6975       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6976       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6977           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6978       ThreadLimitVal =
6979           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6980     }
6981     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6982   case OMPD_target_parallel:
6983   case OMPD_target_parallel_for:
6984   case OMPD_target_parallel_for_simd:
6985   case OMPD_target_teams_distribute_parallel_for:
6986   case OMPD_target_teams_distribute_parallel_for_simd: {
6987     llvm::Value *CondVal = nullptr;
6988     // Handle if clause. If if clause present, the number of threads is
6989     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6990     if (D.hasClausesOfKind<OMPIfClause>()) {
6991       const OMPIfClause *IfClause = nullptr;
6992       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6993         if (C->getNameModifier() == OMPD_unknown ||
6994             C->getNameModifier() == OMPD_parallel) {
6995           IfClause = C;
6996           break;
6997         }
6998       }
6999       if (IfClause) {
7000         const Expr *Cond = IfClause->getCondition();
7001         bool Result;
7002         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7003           if (!Result)
7004             return Bld.getInt32(1);
7005         } else {
7006           CodeGenFunction::RunCleanupsScope Scope(CGF);
7007           CondVal = CGF.EvaluateExprAsBool(Cond);
7008         }
7009       }
7010     }
7011     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7012       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7013       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7014       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7015           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7016       ThreadLimitVal =
7017           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7018     }
7019     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7020       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7021       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7022       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7023           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7024       NumThreadsVal =
7025           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7026       ThreadLimitVal = ThreadLimitVal
7027                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7028                                                                 ThreadLimitVal),
7029                                               NumThreadsVal, ThreadLimitVal)
7030                            : NumThreadsVal;
7031     }
7032     if (!ThreadLimitVal)
7033       ThreadLimitVal = Bld.getInt32(0);
7034     if (CondVal)
7035       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7036     return ThreadLimitVal;
7037   }
7038   case OMPD_target_teams_distribute_simd:
7039   case OMPD_target_simd:
7040     return Bld.getInt32(1);
7041   case OMPD_parallel:
7042   case OMPD_for:
7043   case OMPD_parallel_for:
7044   case OMPD_parallel_sections:
7045   case OMPD_for_simd:
7046   case OMPD_parallel_for_simd:
7047   case OMPD_cancel:
7048   case OMPD_cancellation_point:
7049   case OMPD_ordered:
7050   case OMPD_threadprivate:
7051   case OMPD_allocate:
7052   case OMPD_task:
7053   case OMPD_simd:
7054   case OMPD_sections:
7055   case OMPD_section:
7056   case OMPD_single:
7057   case OMPD_master:
7058   case OMPD_critical:
7059   case OMPD_taskyield:
7060   case OMPD_barrier:
7061   case OMPD_taskwait:
7062   case OMPD_taskgroup:
7063   case OMPD_atomic:
7064   case OMPD_flush:
7065   case OMPD_teams:
7066   case OMPD_target_data:
7067   case OMPD_target_exit_data:
7068   case OMPD_target_enter_data:
7069   case OMPD_distribute:
7070   case OMPD_distribute_simd:
7071   case OMPD_distribute_parallel_for:
7072   case OMPD_distribute_parallel_for_simd:
7073   case OMPD_teams_distribute:
7074   case OMPD_teams_distribute_simd:
7075   case OMPD_teams_distribute_parallel_for:
7076   case OMPD_teams_distribute_parallel_for_simd:
7077   case OMPD_target_update:
7078   case OMPD_declare_simd:
7079   case OMPD_declare_variant:
7080   case OMPD_declare_target:
7081   case OMPD_end_declare_target:
7082   case OMPD_declare_reduction:
7083   case OMPD_declare_mapper:
7084   case OMPD_taskloop:
7085   case OMPD_taskloop_simd:
7086   case OMPD_requires:
7087   case OMPD_unknown:
7088     break;
7089   }
7090   llvm_unreachable("Unsupported directive kind.");
7091 }
7092 
7093 namespace {
7094 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7095 
7096 // Utility to handle information from clauses associated with a given
7097 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7098 // It provides a convenient interface to obtain the information and generate
7099 // code for that information.
7100 class MappableExprsHandler {
7101 public:
7102   /// Values for bit flags used to specify the mapping type for
7103   /// offloading.
7104   enum OpenMPOffloadMappingFlags : uint64_t {
7105     /// No flags
7106     OMP_MAP_NONE = 0x0,
7107     /// Allocate memory on the device and move data from host to device.
7108     OMP_MAP_TO = 0x01,
7109     /// Allocate memory on the device and move data from device to host.
7110     OMP_MAP_FROM = 0x02,
7111     /// Always perform the requested mapping action on the element, even
7112     /// if it was already mapped before.
7113     OMP_MAP_ALWAYS = 0x04,
7114     /// Delete the element from the device environment, ignoring the
7115     /// current reference count associated with the element.
7116     OMP_MAP_DELETE = 0x08,
7117     /// The element being mapped is a pointer-pointee pair; both the
7118     /// pointer and the pointee should be mapped.
7119     OMP_MAP_PTR_AND_OBJ = 0x10,
7120     /// This flags signals that the base address of an entry should be
7121     /// passed to the target kernel as an argument.
7122     OMP_MAP_TARGET_PARAM = 0x20,
7123     /// Signal that the runtime library has to return the device pointer
7124     /// in the current position for the data being mapped. Used when we have the
7125     /// use_device_ptr clause.
7126     OMP_MAP_RETURN_PARAM = 0x40,
7127     /// This flag signals that the reference being passed is a pointer to
7128     /// private data.
7129     OMP_MAP_PRIVATE = 0x80,
7130     /// Pass the element to the device by value.
7131     OMP_MAP_LITERAL = 0x100,
7132     /// Implicit map
7133     OMP_MAP_IMPLICIT = 0x200,
7134     /// Close is a hint to the runtime to allocate memory close to
7135     /// the target device.
7136     OMP_MAP_CLOSE = 0x400,
7137     /// The 16 MSBs of the flags indicate whether the entry is member of some
7138     /// struct/class.
7139     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7140     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7141   };
7142 
7143   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7144   static unsigned getFlagMemberOffset() {
7145     unsigned Offset = 0;
7146     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7147          Remain = Remain >> 1)
7148       Offset++;
7149     return Offset;
7150   }
7151 
7152   /// Class that associates information with a base pointer to be passed to the
7153   /// runtime library.
7154   class BasePointerInfo {
7155     /// The base pointer.
7156     llvm::Value *Ptr = nullptr;
7157     /// The base declaration that refers to this device pointer, or null if
7158     /// there is none.
7159     const ValueDecl *DevPtrDecl = nullptr;
7160 
7161   public:
7162     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7163         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7164     llvm::Value *operator*() const { return Ptr; }
7165     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7166     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7167   };
7168 
7169   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7170   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7171   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7172 
7173   /// Map between a struct and the its lowest & highest elements which have been
7174   /// mapped.
7175   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7176   ///                    HE(FieldIndex, Pointer)}
7177   struct StructRangeInfoTy {
7178     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7179         0, Address::invalid()};
7180     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7181         0, Address::invalid()};
7182     Address Base = Address::invalid();
7183   };
7184 
7185 private:
7186   /// Kind that defines how a device pointer has to be returned.
7187   struct MapInfo {
7188     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7189     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7190     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7191     bool ReturnDevicePointer = false;
7192     bool IsImplicit = false;
7193 
7194     MapInfo() = default;
7195     MapInfo(
7196         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7197         OpenMPMapClauseKind MapType,
7198         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7199         bool ReturnDevicePointer, bool IsImplicit)
7200         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7201           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7202   };
7203 
7204   /// If use_device_ptr is used on a pointer which is a struct member and there
7205   /// is no map information about it, then emission of that entry is deferred
7206   /// until the whole struct has been processed.
7207   struct DeferredDevicePtrEntryTy {
7208     const Expr *IE = nullptr;
7209     const ValueDecl *VD = nullptr;
7210 
7211     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7212         : IE(IE), VD(VD) {}
7213   };
7214 
7215   /// The target directive from where the mappable clauses were extracted. It
7216   /// is either a executable directive or a user-defined mapper directive.
7217   llvm::PointerUnion<const OMPExecutableDirective *,
7218                      const OMPDeclareMapperDecl *>
7219       CurDir;
7220 
7221   /// Function the directive is being generated for.
7222   CodeGenFunction &CGF;
7223 
7224   /// Set of all first private variables in the current directive.
7225   /// bool data is set to true if the variable is implicitly marked as
7226   /// firstprivate, false otherwise.
7227   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7228 
7229   /// Map between device pointer declarations and their expression components.
7230   /// The key value for declarations in 'this' is null.
7231   llvm::DenseMap<
7232       const ValueDecl *,
7233       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7234       DevPointersMap;
7235 
7236   llvm::Value *getExprTypeSize(const Expr *E) const {
7237     QualType ExprTy = E->getType().getCanonicalType();
7238 
7239     // Reference types are ignored for mapping purposes.
7240     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7241       ExprTy = RefTy->getPointeeType().getCanonicalType();
7242 
7243     // Given that an array section is considered a built-in type, we need to
7244     // do the calculation based on the length of the section instead of relying
7245     // on CGF.getTypeSize(E->getType()).
7246     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7247       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7248                             OAE->getBase()->IgnoreParenImpCasts())
7249                             .getCanonicalType();
7250 
7251       // If there is no length associated with the expression, that means we
7252       // are using the whole length of the base.
7253       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7254         return CGF.getTypeSize(BaseTy);
7255 
7256       llvm::Value *ElemSize;
7257       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7258         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7259       } else {
7260         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7261         assert(ATy && "Expecting array type if not a pointer type.");
7262         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7263       }
7264 
7265       // If we don't have a length at this point, that is because we have an
7266       // array section with a single element.
7267       if (!OAE->getLength())
7268         return ElemSize;
7269 
7270       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7271       LengthVal =
7272           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7273       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7274     }
7275     return CGF.getTypeSize(ExprTy);
7276   }
7277 
7278   /// Return the corresponding bits for a given map clause modifier. Add
7279   /// a flag marking the map as a pointer if requested. Add a flag marking the
7280   /// map as the first one of a series of maps that relate to the same map
7281   /// expression.
7282   OpenMPOffloadMappingFlags getMapTypeBits(
7283       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7284       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7285     OpenMPOffloadMappingFlags Bits =
7286         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7287     switch (MapType) {
7288     case OMPC_MAP_alloc:
7289     case OMPC_MAP_release:
7290       // alloc and release is the default behavior in the runtime library,  i.e.
7291       // if we don't pass any bits alloc/release that is what the runtime is
7292       // going to do. Therefore, we don't need to signal anything for these two
7293       // type modifiers.
7294       break;
7295     case OMPC_MAP_to:
7296       Bits |= OMP_MAP_TO;
7297       break;
7298     case OMPC_MAP_from:
7299       Bits |= OMP_MAP_FROM;
7300       break;
7301     case OMPC_MAP_tofrom:
7302       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7303       break;
7304     case OMPC_MAP_delete:
7305       Bits |= OMP_MAP_DELETE;
7306       break;
7307     case OMPC_MAP_unknown:
7308       llvm_unreachable("Unexpected map type!");
7309     }
7310     if (AddPtrFlag)
7311       Bits |= OMP_MAP_PTR_AND_OBJ;
7312     if (AddIsTargetParamFlag)
7313       Bits |= OMP_MAP_TARGET_PARAM;
7314     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7315         != MapModifiers.end())
7316       Bits |= OMP_MAP_ALWAYS;
7317     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7318         != MapModifiers.end())
7319       Bits |= OMP_MAP_CLOSE;
7320     return Bits;
7321   }
7322 
7323   /// Return true if the provided expression is a final array section. A
7324   /// final array section, is one whose length can't be proved to be one.
7325   bool isFinalArraySectionExpression(const Expr *E) const {
7326     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7327 
7328     // It is not an array section and therefore not a unity-size one.
7329     if (!OASE)
7330       return false;
7331 
7332     // An array section with no colon always refer to a single element.
7333     if (OASE->getColonLoc().isInvalid())
7334       return false;
7335 
7336     const Expr *Length = OASE->getLength();
7337 
7338     // If we don't have a length we have to check if the array has size 1
7339     // for this dimension. Also, we should always expect a length if the
7340     // base type is pointer.
7341     if (!Length) {
7342       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7343                              OASE->getBase()->IgnoreParenImpCasts())
7344                              .getCanonicalType();
7345       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7346         return ATy->getSize().getSExtValue() != 1;
7347       // If we don't have a constant dimension length, we have to consider
7348       // the current section as having any size, so it is not necessarily
7349       // unitary. If it happen to be unity size, that's user fault.
7350       return true;
7351     }
7352 
7353     // Check if the length evaluates to 1.
7354     Expr::EvalResult Result;
7355     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7356       return true; // Can have more that size 1.
7357 
7358     llvm::APSInt ConstLength = Result.Val.getInt();
7359     return ConstLength.getSExtValue() != 1;
7360   }
7361 
7362   /// Generate the base pointers, section pointers, sizes and map type
7363   /// bits for the provided map type, map modifier, and expression components.
7364   /// \a IsFirstComponent should be set to true if the provided set of
7365   /// components is the first associated with a capture.
7366   void generateInfoForComponentList(
7367       OpenMPMapClauseKind MapType,
7368       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7369       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7370       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7371       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7372       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7373       bool IsImplicit,
7374       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7375           OverlappedElements = llvm::None) const {
7376     // The following summarizes what has to be generated for each map and the
7377     // types below. The generated information is expressed in this order:
7378     // base pointer, section pointer, size, flags
7379     // (to add to the ones that come from the map type and modifier).
7380     //
7381     // double d;
7382     // int i[100];
7383     // float *p;
7384     //
7385     // struct S1 {
7386     //   int i;
7387     //   float f[50];
7388     // }
7389     // struct S2 {
7390     //   int i;
7391     //   float f[50];
7392     //   S1 s;
7393     //   double *p;
7394     //   struct S2 *ps;
7395     // }
7396     // S2 s;
7397     // S2 *ps;
7398     //
7399     // map(d)
7400     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7401     //
7402     // map(i)
7403     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7404     //
7405     // map(i[1:23])
7406     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7407     //
7408     // map(p)
7409     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7410     //
7411     // map(p[1:24])
7412     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7413     //
7414     // map(s)
7415     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7416     //
7417     // map(s.i)
7418     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7419     //
7420     // map(s.s.f)
7421     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7422     //
7423     // map(s.p)
7424     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7425     //
7426     // map(to: s.p[:22])
7427     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7428     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7429     // &(s.p), &(s.p[0]), 22*sizeof(double),
7430     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7431     // (*) alloc space for struct members, only this is a target parameter
7432     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7433     //      optimizes this entry out, same in the examples below)
7434     // (***) map the pointee (map: to)
7435     //
7436     // map(s.ps)
7437     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7438     //
7439     // map(from: s.ps->s.i)
7440     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7441     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7442     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7443     //
7444     // map(to: s.ps->ps)
7445     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7446     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7447     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7448     //
7449     // map(s.ps->ps->ps)
7450     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7451     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7452     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7453     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7454     //
7455     // map(to: s.ps->ps->s.f[:22])
7456     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7457     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7458     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7459     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7460     //
7461     // map(ps)
7462     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7463     //
7464     // map(ps->i)
7465     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7466     //
7467     // map(ps->s.f)
7468     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7469     //
7470     // map(from: ps->p)
7471     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7472     //
7473     // map(to: ps->p[:22])
7474     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7475     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7476     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7477     //
7478     // map(ps->ps)
7479     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7480     //
7481     // map(from: ps->ps->s.i)
7482     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7483     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7484     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7485     //
7486     // map(from: ps->ps->ps)
7487     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7488     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7489     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7490     //
7491     // map(ps->ps->ps->ps)
7492     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7493     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7494     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7495     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7496     //
7497     // map(to: ps->ps->ps->s.f[:22])
7498     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7499     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7500     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7501     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7502     //
7503     // map(to: s.f[:22]) map(from: s.p[:33])
7504     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7505     //     sizeof(double*) (**), TARGET_PARAM
7506     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7507     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7508     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7509     // (*) allocate contiguous space needed to fit all mapped members even if
7510     //     we allocate space for members not mapped (in this example,
7511     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7512     //     them as well because they fall between &s.f[0] and &s.p)
7513     //
7514     // map(from: s.f[:22]) map(to: ps->p[:33])
7515     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7516     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7517     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7518     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7519     // (*) the struct this entry pertains to is the 2nd element in the list of
7520     //     arguments, hence MEMBER_OF(2)
7521     //
7522     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7523     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7524     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7525     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7526     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7527     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7528     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7529     // (*) the struct this entry pertains to is the 4th element in the list
7530     //     of arguments, hence MEMBER_OF(4)
7531 
7532     // Track if the map information being generated is the first for a capture.
7533     bool IsCaptureFirstInfo = IsFirstComponentList;
7534     // When the variable is on a declare target link or in a to clause with
7535     // unified memory, a reference is needed to hold the host/device address
7536     // of the variable.
7537     bool RequiresReference = false;
7538 
7539     // Scan the components from the base to the complete expression.
7540     auto CI = Components.rbegin();
7541     auto CE = Components.rend();
7542     auto I = CI;
7543 
7544     // Track if the map information being generated is the first for a list of
7545     // components.
7546     bool IsExpressionFirstInfo = true;
7547     Address BP = Address::invalid();
7548     const Expr *AssocExpr = I->getAssociatedExpression();
7549     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7550     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7551 
7552     if (isa<MemberExpr>(AssocExpr)) {
7553       // The base is the 'this' pointer. The content of the pointer is going
7554       // to be the base of the field being mapped.
7555       BP = CGF.LoadCXXThisAddress();
7556     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7557                (OASE &&
7558                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7559       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7560     } else {
7561       // The base is the reference to the variable.
7562       // BP = &Var.
7563       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7564       if (const auto *VD =
7565               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7566         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7567                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7568           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7569               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7570                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7571             RequiresReference = true;
7572             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7573           }
7574         }
7575       }
7576 
7577       // If the variable is a pointer and is being dereferenced (i.e. is not
7578       // the last component), the base has to be the pointer itself, not its
7579       // reference. References are ignored for mapping purposes.
7580       QualType Ty =
7581           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7582       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7583         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7584 
7585         // We do not need to generate individual map information for the
7586         // pointer, it can be associated with the combined storage.
7587         ++I;
7588       }
7589     }
7590 
7591     // Track whether a component of the list should be marked as MEMBER_OF some
7592     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7593     // in a component list should be marked as MEMBER_OF, all subsequent entries
7594     // do not belong to the base struct. E.g.
7595     // struct S2 s;
7596     // s.ps->ps->ps->f[:]
7597     //   (1) (2) (3) (4)
7598     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7599     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7600     // is the pointee of ps(2) which is not member of struct s, so it should not
7601     // be marked as such (it is still PTR_AND_OBJ).
7602     // The variable is initialized to false so that PTR_AND_OBJ entries which
7603     // are not struct members are not considered (e.g. array of pointers to
7604     // data).
7605     bool ShouldBeMemberOf = false;
7606 
7607     // Variable keeping track of whether or not we have encountered a component
7608     // in the component list which is a member expression. Useful when we have a
7609     // pointer or a final array section, in which case it is the previous
7610     // component in the list which tells us whether we have a member expression.
7611     // E.g. X.f[:]
7612     // While processing the final array section "[:]" it is "f" which tells us
7613     // whether we are dealing with a member of a declared struct.
7614     const MemberExpr *EncounteredME = nullptr;
7615 
7616     for (; I != CE; ++I) {
7617       // If the current component is member of a struct (parent struct) mark it.
7618       if (!EncounteredME) {
7619         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7620         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7621         // as MEMBER_OF the parent struct.
7622         if (EncounteredME)
7623           ShouldBeMemberOf = true;
7624       }
7625 
7626       auto Next = std::next(I);
7627 
7628       // We need to generate the addresses and sizes if this is the last
7629       // component, if the component is a pointer or if it is an array section
7630       // whose length can't be proved to be one. If this is a pointer, it
7631       // becomes the base address for the following components.
7632 
7633       // A final array section, is one whose length can't be proved to be one.
7634       bool IsFinalArraySection =
7635           isFinalArraySectionExpression(I->getAssociatedExpression());
7636 
7637       // Get information on whether the element is a pointer. Have to do a
7638       // special treatment for array sections given that they are built-in
7639       // types.
7640       const auto *OASE =
7641           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7642       bool IsPointer =
7643           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7644                        .getCanonicalType()
7645                        ->isAnyPointerType()) ||
7646           I->getAssociatedExpression()->getType()->isAnyPointerType();
7647 
7648       if (Next == CE || IsPointer || IsFinalArraySection) {
7649         // If this is not the last component, we expect the pointer to be
7650         // associated with an array expression or member expression.
7651         assert((Next == CE ||
7652                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7653                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7654                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7655                "Unexpected expression");
7656 
7657         Address LB =
7658             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7659 
7660         // If this component is a pointer inside the base struct then we don't
7661         // need to create any entry for it - it will be combined with the object
7662         // it is pointing to into a single PTR_AND_OBJ entry.
7663         bool IsMemberPointer =
7664             IsPointer && EncounteredME &&
7665             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7666              EncounteredME);
7667         if (!OverlappedElements.empty()) {
7668           // Handle base element with the info for overlapped elements.
7669           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7670           assert(Next == CE &&
7671                  "Expected last element for the overlapped elements.");
7672           assert(!IsPointer &&
7673                  "Unexpected base element with the pointer type.");
7674           // Mark the whole struct as the struct that requires allocation on the
7675           // device.
7676           PartialStruct.LowestElem = {0, LB};
7677           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7678               I->getAssociatedExpression()->getType());
7679           Address HB = CGF.Builder.CreateConstGEP(
7680               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7681                                                               CGF.VoidPtrTy),
7682               TypeSize.getQuantity() - 1);
7683           PartialStruct.HighestElem = {
7684               std::numeric_limits<decltype(
7685                   PartialStruct.HighestElem.first)>::max(),
7686               HB};
7687           PartialStruct.Base = BP;
7688           // Emit data for non-overlapped data.
7689           OpenMPOffloadMappingFlags Flags =
7690               OMP_MAP_MEMBER_OF |
7691               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7692                              /*AddPtrFlag=*/false,
7693                              /*AddIsTargetParamFlag=*/false);
7694           LB = BP;
7695           llvm::Value *Size = nullptr;
7696           // Do bitcopy of all non-overlapped structure elements.
7697           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7698                    Component : OverlappedElements) {
7699             Address ComponentLB = Address::invalid();
7700             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7701                  Component) {
7702               if (MC.getAssociatedDeclaration()) {
7703                 ComponentLB =
7704                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7705                         .getAddress();
7706                 Size = CGF.Builder.CreatePtrDiff(
7707                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7708                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7709                 break;
7710               }
7711             }
7712             BasePointers.push_back(BP.getPointer());
7713             Pointers.push_back(LB.getPointer());
7714             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7715                                                       /*isSigned=*/true));
7716             Types.push_back(Flags);
7717             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7718           }
7719           BasePointers.push_back(BP.getPointer());
7720           Pointers.push_back(LB.getPointer());
7721           Size = CGF.Builder.CreatePtrDiff(
7722               CGF.EmitCastToVoidPtr(
7723                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7724               CGF.EmitCastToVoidPtr(LB.getPointer()));
7725           Sizes.push_back(
7726               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7727           Types.push_back(Flags);
7728           break;
7729         }
7730         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7731         if (!IsMemberPointer) {
7732           BasePointers.push_back(BP.getPointer());
7733           Pointers.push_back(LB.getPointer());
7734           Sizes.push_back(
7735               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7736 
7737           // We need to add a pointer flag for each map that comes from the
7738           // same expression except for the first one. We also need to signal
7739           // this map is the first one that relates with the current capture
7740           // (there is a set of entries for each capture).
7741           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7742               MapType, MapModifiers, IsImplicit,
7743               !IsExpressionFirstInfo || RequiresReference,
7744               IsCaptureFirstInfo && !RequiresReference);
7745 
7746           if (!IsExpressionFirstInfo) {
7747             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7748             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7749             if (IsPointer)
7750               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7751                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7752 
7753             if (ShouldBeMemberOf) {
7754               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7755               // should be later updated with the correct value of MEMBER_OF.
7756               Flags |= OMP_MAP_MEMBER_OF;
7757               // From now on, all subsequent PTR_AND_OBJ entries should not be
7758               // marked as MEMBER_OF.
7759               ShouldBeMemberOf = false;
7760             }
7761           }
7762 
7763           Types.push_back(Flags);
7764         }
7765 
7766         // If we have encountered a member expression so far, keep track of the
7767         // mapped member. If the parent is "*this", then the value declaration
7768         // is nullptr.
7769         if (EncounteredME) {
7770           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7771           unsigned FieldIndex = FD->getFieldIndex();
7772 
7773           // Update info about the lowest and highest elements for this struct
7774           if (!PartialStruct.Base.isValid()) {
7775             PartialStruct.LowestElem = {FieldIndex, LB};
7776             PartialStruct.HighestElem = {FieldIndex, LB};
7777             PartialStruct.Base = BP;
7778           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7779             PartialStruct.LowestElem = {FieldIndex, LB};
7780           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7781             PartialStruct.HighestElem = {FieldIndex, LB};
7782           }
7783         }
7784 
7785         // If we have a final array section, we are done with this expression.
7786         if (IsFinalArraySection)
7787           break;
7788 
7789         // The pointer becomes the base for the next element.
7790         if (Next != CE)
7791           BP = LB;
7792 
7793         IsExpressionFirstInfo = false;
7794         IsCaptureFirstInfo = false;
7795       }
7796     }
7797   }
7798 
7799   /// Return the adjusted map modifiers if the declaration a capture refers to
7800   /// appears in a first-private clause. This is expected to be used only with
7801   /// directives that start with 'target'.
7802   MappableExprsHandler::OpenMPOffloadMappingFlags
7803   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7804     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7805 
7806     // A first private variable captured by reference will use only the
7807     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7808     // declaration is known as first-private in this handler.
7809     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7810       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7811           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7812         return MappableExprsHandler::OMP_MAP_ALWAYS |
7813                MappableExprsHandler::OMP_MAP_TO;
7814       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7815         return MappableExprsHandler::OMP_MAP_TO |
7816                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7817       return MappableExprsHandler::OMP_MAP_PRIVATE |
7818              MappableExprsHandler::OMP_MAP_TO;
7819     }
7820     return MappableExprsHandler::OMP_MAP_TO |
7821            MappableExprsHandler::OMP_MAP_FROM;
7822   }
7823 
7824   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7825     // Rotate by getFlagMemberOffset() bits.
7826     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7827                                                   << getFlagMemberOffset());
7828   }
7829 
7830   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7831                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7832     // If the entry is PTR_AND_OBJ but has not been marked with the special
7833     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7834     // marked as MEMBER_OF.
7835     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7836         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7837       return;
7838 
7839     // Reset the placeholder value to prepare the flag for the assignment of the
7840     // proper MEMBER_OF value.
7841     Flags &= ~OMP_MAP_MEMBER_OF;
7842     Flags |= MemberOfFlag;
7843   }
7844 
7845   void getPlainLayout(const CXXRecordDecl *RD,
7846                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7847                       bool AsBase) const {
7848     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7849 
7850     llvm::StructType *St =
7851         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7852 
7853     unsigned NumElements = St->getNumElements();
7854     llvm::SmallVector<
7855         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7856         RecordLayout(NumElements);
7857 
7858     // Fill bases.
7859     for (const auto &I : RD->bases()) {
7860       if (I.isVirtual())
7861         continue;
7862       const auto *Base = I.getType()->getAsCXXRecordDecl();
7863       // Ignore empty bases.
7864       if (Base->isEmpty() || CGF.getContext()
7865                                  .getASTRecordLayout(Base)
7866                                  .getNonVirtualSize()
7867                                  .isZero())
7868         continue;
7869 
7870       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7871       RecordLayout[FieldIndex] = Base;
7872     }
7873     // Fill in virtual bases.
7874     for (const auto &I : RD->vbases()) {
7875       const auto *Base = I.getType()->getAsCXXRecordDecl();
7876       // Ignore empty bases.
7877       if (Base->isEmpty())
7878         continue;
7879       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7880       if (RecordLayout[FieldIndex])
7881         continue;
7882       RecordLayout[FieldIndex] = Base;
7883     }
7884     // Fill in all the fields.
7885     assert(!RD->isUnion() && "Unexpected union.");
7886     for (const auto *Field : RD->fields()) {
7887       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7888       // will fill in later.)
7889       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7890         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7891         RecordLayout[FieldIndex] = Field;
7892       }
7893     }
7894     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7895              &Data : RecordLayout) {
7896       if (Data.isNull())
7897         continue;
7898       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7899         getPlainLayout(Base, Layout, /*AsBase=*/true);
7900       else
7901         Layout.push_back(Data.get<const FieldDecl *>());
7902     }
7903   }
7904 
7905 public:
7906   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7907       : CurDir(&Dir), CGF(CGF) {
7908     // Extract firstprivate clause information.
7909     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7910       for (const auto *D : C->varlists())
7911         FirstPrivateDecls.try_emplace(
7912             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7913     // Extract device pointer clause information.
7914     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7915       for (auto L : C->component_lists())
7916         DevPointersMap[L.first].push_back(L.second);
7917   }
7918 
7919   /// Constructor for the declare mapper directive.
7920   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7921       : CurDir(&Dir), CGF(CGF) {}
7922 
7923   /// Generate code for the combined entry if we have a partially mapped struct
7924   /// and take care of the mapping flags of the arguments corresponding to
7925   /// individual struct members.
7926   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7927                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7928                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7929                          const StructRangeInfoTy &PartialStruct) const {
7930     // Base is the base of the struct
7931     BasePointers.push_back(PartialStruct.Base.getPointer());
7932     // Pointer is the address of the lowest element
7933     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7934     Pointers.push_back(LB);
7935     // Size is (addr of {highest+1} element) - (addr of lowest element)
7936     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7937     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7938     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7939     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7940     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7941     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7942                                                   /*isSigned=*/false);
7943     Sizes.push_back(Size);
7944     // Map type is always TARGET_PARAM
7945     Types.push_back(OMP_MAP_TARGET_PARAM);
7946     // Remove TARGET_PARAM flag from the first element
7947     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7948 
7949     // All other current entries will be MEMBER_OF the combined entry
7950     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7951     // 0xFFFF in the MEMBER_OF field).
7952     OpenMPOffloadMappingFlags MemberOfFlag =
7953         getMemberOfFlag(BasePointers.size() - 1);
7954     for (auto &M : CurTypes)
7955       setCorrectMemberOfFlag(M, MemberOfFlag);
7956   }
7957 
7958   /// Generate all the base pointers, section pointers, sizes and map
7959   /// types for the extracted mappable expressions. Also, for each item that
7960   /// relates with a device pointer, a pair of the relevant declaration and
7961   /// index where it occurs is appended to the device pointers info array.
7962   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7963                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7964                        MapFlagsArrayTy &Types) const {
7965     // We have to process the component lists that relate with the same
7966     // declaration in a single chunk so that we can generate the map flags
7967     // correctly. Therefore, we organize all lists in a map.
7968     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7969 
7970     // Helper function to fill the information map for the different supported
7971     // clauses.
7972     auto &&InfoGen = [&Info](
7973         const ValueDecl *D,
7974         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7975         OpenMPMapClauseKind MapType,
7976         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7977         bool ReturnDevicePointer, bool IsImplicit) {
7978       const ValueDecl *VD =
7979           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7980       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7981                             IsImplicit);
7982     };
7983 
7984     assert(CurDir.is<const OMPExecutableDirective *>() &&
7985            "Expect a executable directive");
7986     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7987     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7988       for (const auto &L : C->component_lists()) {
7989         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7990             /*ReturnDevicePointer=*/false, C->isImplicit());
7991       }
7992     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7993       for (const auto &L : C->component_lists()) {
7994         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7995             /*ReturnDevicePointer=*/false, C->isImplicit());
7996       }
7997     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7998       for (const auto &L : C->component_lists()) {
7999         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8000             /*ReturnDevicePointer=*/false, C->isImplicit());
8001       }
8002 
8003     // Look at the use_device_ptr clause information and mark the existing map
8004     // entries as such. If there is no map information for an entry in the
8005     // use_device_ptr list, we create one with map type 'alloc' and zero size
8006     // section. It is the user fault if that was not mapped before. If there is
8007     // no map information and the pointer is a struct member, then we defer the
8008     // emission of that entry until the whole struct has been processed.
8009     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8010         DeferredInfo;
8011 
8012     for (const auto *C :
8013          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8014       for (const auto &L : C->component_lists()) {
8015         assert(!L.second.empty() && "Not expecting empty list of components!");
8016         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8017         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8018         const Expr *IE = L.second.back().getAssociatedExpression();
8019         // If the first component is a member expression, we have to look into
8020         // 'this', which maps to null in the map of map information. Otherwise
8021         // look directly for the information.
8022         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8023 
8024         // We potentially have map information for this declaration already.
8025         // Look for the first set of components that refer to it.
8026         if (It != Info.end()) {
8027           auto CI = std::find_if(
8028               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8029                 return MI.Components.back().getAssociatedDeclaration() == VD;
8030               });
8031           // If we found a map entry, signal that the pointer has to be returned
8032           // and move on to the next declaration.
8033           if (CI != It->second.end()) {
8034             CI->ReturnDevicePointer = true;
8035             continue;
8036           }
8037         }
8038 
8039         // We didn't find any match in our map information - generate a zero
8040         // size array section - if the pointer is a struct member we defer this
8041         // action until the whole struct has been processed.
8042         if (isa<MemberExpr>(IE)) {
8043           // Insert the pointer into Info to be processed by
8044           // generateInfoForComponentList. Because it is a member pointer
8045           // without a pointee, no entry will be generated for it, therefore
8046           // we need to generate one after the whole struct has been processed.
8047           // Nonetheless, generateInfoForComponentList must be called to take
8048           // the pointer into account for the calculation of the range of the
8049           // partial struct.
8050           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8051                   /*ReturnDevicePointer=*/false, C->isImplicit());
8052           DeferredInfo[nullptr].emplace_back(IE, VD);
8053         } else {
8054           llvm::Value *Ptr =
8055               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8056           BasePointers.emplace_back(Ptr, VD);
8057           Pointers.push_back(Ptr);
8058           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8059           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8060         }
8061       }
8062     }
8063 
8064     for (const auto &M : Info) {
8065       // We need to know when we generate information for the first component
8066       // associated with a capture, because the mapping flags depend on it.
8067       bool IsFirstComponentList = true;
8068 
8069       // Temporary versions of arrays
8070       MapBaseValuesArrayTy CurBasePointers;
8071       MapValuesArrayTy CurPointers;
8072       MapValuesArrayTy CurSizes;
8073       MapFlagsArrayTy CurTypes;
8074       StructRangeInfoTy PartialStruct;
8075 
8076       for (const MapInfo &L : M.second) {
8077         assert(!L.Components.empty() &&
8078                "Not expecting declaration with no component lists.");
8079 
8080         // Remember the current base pointer index.
8081         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8082         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8083                                      CurBasePointers, CurPointers, CurSizes,
8084                                      CurTypes, PartialStruct,
8085                                      IsFirstComponentList, L.IsImplicit);
8086 
8087         // If this entry relates with a device pointer, set the relevant
8088         // declaration and add the 'return pointer' flag.
8089         if (L.ReturnDevicePointer) {
8090           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8091                  "Unexpected number of mapped base pointers.");
8092 
8093           const ValueDecl *RelevantVD =
8094               L.Components.back().getAssociatedDeclaration();
8095           assert(RelevantVD &&
8096                  "No relevant declaration related with device pointer??");
8097 
8098           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8099           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8100         }
8101         IsFirstComponentList = false;
8102       }
8103 
8104       // Append any pending zero-length pointers which are struct members and
8105       // used with use_device_ptr.
8106       auto CI = DeferredInfo.find(M.first);
8107       if (CI != DeferredInfo.end()) {
8108         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8109           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8110           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8111               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8112           CurBasePointers.emplace_back(BasePtr, L.VD);
8113           CurPointers.push_back(Ptr);
8114           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8115           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8116           // value MEMBER_OF=FFFF so that the entry is later updated with the
8117           // correct value of MEMBER_OF.
8118           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8119                              OMP_MAP_MEMBER_OF);
8120         }
8121       }
8122 
8123       // If there is an entry in PartialStruct it means we have a struct with
8124       // individual members mapped. Emit an extra combined entry.
8125       if (PartialStruct.Base.isValid())
8126         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8127                           PartialStruct);
8128 
8129       // We need to append the results of this capture to what we already have.
8130       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8131       Pointers.append(CurPointers.begin(), CurPointers.end());
8132       Sizes.append(CurSizes.begin(), CurSizes.end());
8133       Types.append(CurTypes.begin(), CurTypes.end());
8134     }
8135   }
8136 
8137   /// Generate all the base pointers, section pointers, sizes and map types for
8138   /// the extracted map clauses of user-defined mapper.
8139   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8140                                 MapValuesArrayTy &Pointers,
8141                                 MapValuesArrayTy &Sizes,
8142                                 MapFlagsArrayTy &Types) const {
8143     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8144            "Expect a declare mapper directive");
8145     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8146     // We have to process the component lists that relate with the same
8147     // declaration in a single chunk so that we can generate the map flags
8148     // correctly. Therefore, we organize all lists in a map.
8149     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8150 
8151     // Helper function to fill the information map for the different supported
8152     // clauses.
8153     auto &&InfoGen = [&Info](
8154         const ValueDecl *D,
8155         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8156         OpenMPMapClauseKind MapType,
8157         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8158         bool ReturnDevicePointer, bool IsImplicit) {
8159       const ValueDecl *VD =
8160           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8161       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8162                             IsImplicit);
8163     };
8164 
8165     for (const auto *C : CurMapperDir->clauselists()) {
8166       const auto *MC = cast<OMPMapClause>(C);
8167       for (const auto &L : MC->component_lists()) {
8168         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8169                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8170       }
8171     }
8172 
8173     for (const auto &M : Info) {
8174       // We need to know when we generate information for the first component
8175       // associated with a capture, because the mapping flags depend on it.
8176       bool IsFirstComponentList = true;
8177 
8178       // Temporary versions of arrays
8179       MapBaseValuesArrayTy CurBasePointers;
8180       MapValuesArrayTy CurPointers;
8181       MapValuesArrayTy CurSizes;
8182       MapFlagsArrayTy CurTypes;
8183       StructRangeInfoTy PartialStruct;
8184 
8185       for (const MapInfo &L : M.second) {
8186         assert(!L.Components.empty() &&
8187                "Not expecting declaration with no component lists.");
8188         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8189                                      CurBasePointers, CurPointers, CurSizes,
8190                                      CurTypes, PartialStruct,
8191                                      IsFirstComponentList, L.IsImplicit);
8192         IsFirstComponentList = false;
8193       }
8194 
8195       // If there is an entry in PartialStruct it means we have a struct with
8196       // individual members mapped. Emit an extra combined entry.
8197       if (PartialStruct.Base.isValid())
8198         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8199                           PartialStruct);
8200 
8201       // We need to append the results of this capture to what we already have.
8202       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8203       Pointers.append(CurPointers.begin(), CurPointers.end());
8204       Sizes.append(CurSizes.begin(), CurSizes.end());
8205       Types.append(CurTypes.begin(), CurTypes.end());
8206     }
8207   }
8208 
8209   /// Emit capture info for lambdas for variables captured by reference.
8210   void generateInfoForLambdaCaptures(
8211       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8212       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8213       MapFlagsArrayTy &Types,
8214       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8215     const auto *RD = VD->getType()
8216                          .getCanonicalType()
8217                          .getNonReferenceType()
8218                          ->getAsCXXRecordDecl();
8219     if (!RD || !RD->isLambda())
8220       return;
8221     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8222     LValue VDLVal = CGF.MakeAddrLValue(
8223         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8224     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8225     FieldDecl *ThisCapture = nullptr;
8226     RD->getCaptureFields(Captures, ThisCapture);
8227     if (ThisCapture) {
8228       LValue ThisLVal =
8229           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8230       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8231       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8232       BasePointers.push_back(ThisLVal.getPointer());
8233       Pointers.push_back(ThisLValVal.getPointer());
8234       Sizes.push_back(
8235           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8236                                     CGF.Int64Ty, /*isSigned=*/true));
8237       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8238                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8239     }
8240     for (const LambdaCapture &LC : RD->captures()) {
8241       if (!LC.capturesVariable())
8242         continue;
8243       const VarDecl *VD = LC.getCapturedVar();
8244       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8245         continue;
8246       auto It = Captures.find(VD);
8247       assert(It != Captures.end() && "Found lambda capture without field.");
8248       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8249       if (LC.getCaptureKind() == LCK_ByRef) {
8250         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8251         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8252         BasePointers.push_back(VarLVal.getPointer());
8253         Pointers.push_back(VarLValVal.getPointer());
8254         Sizes.push_back(CGF.Builder.CreateIntCast(
8255             CGF.getTypeSize(
8256                 VD->getType().getCanonicalType().getNonReferenceType()),
8257             CGF.Int64Ty, /*isSigned=*/true));
8258       } else {
8259         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8260         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8261         BasePointers.push_back(VarLVal.getPointer());
8262         Pointers.push_back(VarRVal.getScalarVal());
8263         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8264       }
8265       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8266                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8267     }
8268   }
8269 
8270   /// Set correct indices for lambdas captures.
8271   void adjustMemberOfForLambdaCaptures(
8272       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8273       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8274       MapFlagsArrayTy &Types) const {
8275     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8276       // Set correct member_of idx for all implicit lambda captures.
8277       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8278                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8279         continue;
8280       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8281       assert(BasePtr && "Unable to find base lambda address.");
8282       int TgtIdx = -1;
8283       for (unsigned J = I; J > 0; --J) {
8284         unsigned Idx = J - 1;
8285         if (Pointers[Idx] != BasePtr)
8286           continue;
8287         TgtIdx = Idx;
8288         break;
8289       }
8290       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8291       // All other current entries will be MEMBER_OF the combined entry
8292       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8293       // 0xFFFF in the MEMBER_OF field).
8294       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8295       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8296     }
8297   }
8298 
8299   /// Generate the base pointers, section pointers, sizes and map types
8300   /// associated to a given capture.
8301   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8302                               llvm::Value *Arg,
8303                               MapBaseValuesArrayTy &BasePointers,
8304                               MapValuesArrayTy &Pointers,
8305                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8306                               StructRangeInfoTy &PartialStruct) const {
8307     assert(!Cap->capturesVariableArrayType() &&
8308            "Not expecting to generate map info for a variable array type!");
8309 
8310     // We need to know when we generating information for the first component
8311     const ValueDecl *VD = Cap->capturesThis()
8312                               ? nullptr
8313                               : Cap->getCapturedVar()->getCanonicalDecl();
8314 
8315     // If this declaration appears in a is_device_ptr clause we just have to
8316     // pass the pointer by value. If it is a reference to a declaration, we just
8317     // pass its value.
8318     if (DevPointersMap.count(VD)) {
8319       BasePointers.emplace_back(Arg, VD);
8320       Pointers.push_back(Arg);
8321       Sizes.push_back(
8322           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8323                                     CGF.Int64Ty, /*isSigned=*/true));
8324       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8325       return;
8326     }
8327 
8328     using MapData =
8329         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8330                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8331     SmallVector<MapData, 4> DeclComponentLists;
8332     assert(CurDir.is<const OMPExecutableDirective *>() &&
8333            "Expect a executable directive");
8334     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8335     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8336       for (const auto &L : C->decl_component_lists(VD)) {
8337         assert(L.first == VD &&
8338                "We got information for the wrong declaration??");
8339         assert(!L.second.empty() &&
8340                "Not expecting declaration with no component lists.");
8341         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8342                                         C->getMapTypeModifiers(),
8343                                         C->isImplicit());
8344       }
8345     }
8346 
8347     // Find overlapping elements (including the offset from the base element).
8348     llvm::SmallDenseMap<
8349         const MapData *,
8350         llvm::SmallVector<
8351             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8352         4>
8353         OverlappedData;
8354     size_t Count = 0;
8355     for (const MapData &L : DeclComponentLists) {
8356       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8357       OpenMPMapClauseKind MapType;
8358       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8359       bool IsImplicit;
8360       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8361       ++Count;
8362       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8363         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8364         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8365         auto CI = Components.rbegin();
8366         auto CE = Components.rend();
8367         auto SI = Components1.rbegin();
8368         auto SE = Components1.rend();
8369         for (; CI != CE && SI != SE; ++CI, ++SI) {
8370           if (CI->getAssociatedExpression()->getStmtClass() !=
8371               SI->getAssociatedExpression()->getStmtClass())
8372             break;
8373           // Are we dealing with different variables/fields?
8374           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8375             break;
8376         }
8377         // Found overlapping if, at least for one component, reached the head of
8378         // the components list.
8379         if (CI == CE || SI == SE) {
8380           assert((CI != CE || SI != SE) &&
8381                  "Unexpected full match of the mapping components.");
8382           const MapData &BaseData = CI == CE ? L : L1;
8383           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8384               SI == SE ? Components : Components1;
8385           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8386           OverlappedElements.getSecond().push_back(SubData);
8387         }
8388       }
8389     }
8390     // Sort the overlapped elements for each item.
8391     llvm::SmallVector<const FieldDecl *, 4> Layout;
8392     if (!OverlappedData.empty()) {
8393       if (const auto *CRD =
8394               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8395         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8396       else {
8397         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8398         Layout.append(RD->field_begin(), RD->field_end());
8399       }
8400     }
8401     for (auto &Pair : OverlappedData) {
8402       llvm::sort(
8403           Pair.getSecond(),
8404           [&Layout](
8405               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8406               OMPClauseMappableExprCommon::MappableExprComponentListRef
8407                   Second) {
8408             auto CI = First.rbegin();
8409             auto CE = First.rend();
8410             auto SI = Second.rbegin();
8411             auto SE = Second.rend();
8412             for (; CI != CE && SI != SE; ++CI, ++SI) {
8413               if (CI->getAssociatedExpression()->getStmtClass() !=
8414                   SI->getAssociatedExpression()->getStmtClass())
8415                 break;
8416               // Are we dealing with different variables/fields?
8417               if (CI->getAssociatedDeclaration() !=
8418                   SI->getAssociatedDeclaration())
8419                 break;
8420             }
8421 
8422             // Lists contain the same elements.
8423             if (CI == CE && SI == SE)
8424               return false;
8425 
8426             // List with less elements is less than list with more elements.
8427             if (CI == CE || SI == SE)
8428               return CI == CE;
8429 
8430             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8431             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8432             if (FD1->getParent() == FD2->getParent())
8433               return FD1->getFieldIndex() < FD2->getFieldIndex();
8434             const auto It =
8435                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8436                   return FD == FD1 || FD == FD2;
8437                 });
8438             return *It == FD1;
8439           });
8440     }
8441 
8442     // Associated with a capture, because the mapping flags depend on it.
8443     // Go through all of the elements with the overlapped elements.
8444     for (const auto &Pair : OverlappedData) {
8445       const MapData &L = *Pair.getFirst();
8446       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8447       OpenMPMapClauseKind MapType;
8448       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8449       bool IsImplicit;
8450       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8451       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8452           OverlappedComponents = Pair.getSecond();
8453       bool IsFirstComponentList = true;
8454       generateInfoForComponentList(MapType, MapModifiers, Components,
8455                                    BasePointers, Pointers, Sizes, Types,
8456                                    PartialStruct, IsFirstComponentList,
8457                                    IsImplicit, OverlappedComponents);
8458     }
8459     // Go through other elements without overlapped elements.
8460     bool IsFirstComponentList = OverlappedData.empty();
8461     for (const MapData &L : DeclComponentLists) {
8462       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8463       OpenMPMapClauseKind MapType;
8464       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8465       bool IsImplicit;
8466       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8467       auto It = OverlappedData.find(&L);
8468       if (It == OverlappedData.end())
8469         generateInfoForComponentList(MapType, MapModifiers, Components,
8470                                      BasePointers, Pointers, Sizes, Types,
8471                                      PartialStruct, IsFirstComponentList,
8472                                      IsImplicit);
8473       IsFirstComponentList = false;
8474     }
8475   }
8476 
8477   /// Generate the base pointers, section pointers, sizes and map types
8478   /// associated with the declare target link variables.
8479   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8480                                         MapValuesArrayTy &Pointers,
8481                                         MapValuesArrayTy &Sizes,
8482                                         MapFlagsArrayTy &Types) const {
8483     assert(CurDir.is<const OMPExecutableDirective *>() &&
8484            "Expect a executable directive");
8485     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8486     // Map other list items in the map clause which are not captured variables
8487     // but "declare target link" global variables.
8488     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8489       for (const auto &L : C->component_lists()) {
8490         if (!L.first)
8491           continue;
8492         const auto *VD = dyn_cast<VarDecl>(L.first);
8493         if (!VD)
8494           continue;
8495         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8496             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8497         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8498             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8499           continue;
8500         StructRangeInfoTy PartialStruct;
8501         generateInfoForComponentList(
8502             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8503             Pointers, Sizes, Types, PartialStruct,
8504             /*IsFirstComponentList=*/true, C->isImplicit());
8505         assert(!PartialStruct.Base.isValid() &&
8506                "No partial structs for declare target link expected.");
8507       }
8508     }
8509   }
8510 
8511   /// Generate the default map information for a given capture \a CI,
8512   /// record field declaration \a RI and captured value \a CV.
8513   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8514                               const FieldDecl &RI, llvm::Value *CV,
8515                               MapBaseValuesArrayTy &CurBasePointers,
8516                               MapValuesArrayTy &CurPointers,
8517                               MapValuesArrayTy &CurSizes,
8518                               MapFlagsArrayTy &CurMapTypes) const {
8519     bool IsImplicit = true;
8520     // Do the default mapping.
8521     if (CI.capturesThis()) {
8522       CurBasePointers.push_back(CV);
8523       CurPointers.push_back(CV);
8524       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8525       CurSizes.push_back(
8526           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8527                                     CGF.Int64Ty, /*isSigned=*/true));
8528       // Default map type.
8529       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8530     } else if (CI.capturesVariableByCopy()) {
8531       CurBasePointers.push_back(CV);
8532       CurPointers.push_back(CV);
8533       if (!RI.getType()->isAnyPointerType()) {
8534         // We have to signal to the runtime captures passed by value that are
8535         // not pointers.
8536         CurMapTypes.push_back(OMP_MAP_LITERAL);
8537         CurSizes.push_back(CGF.Builder.CreateIntCast(
8538             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8539       } else {
8540         // Pointers are implicitly mapped with a zero size and no flags
8541         // (other than first map that is added for all implicit maps).
8542         CurMapTypes.push_back(OMP_MAP_NONE);
8543         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8544       }
8545       const VarDecl *VD = CI.getCapturedVar();
8546       auto I = FirstPrivateDecls.find(VD);
8547       if (I != FirstPrivateDecls.end())
8548         IsImplicit = I->getSecond();
8549     } else {
8550       assert(CI.capturesVariable() && "Expected captured reference.");
8551       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8552       QualType ElementType = PtrTy->getPointeeType();
8553       CurSizes.push_back(CGF.Builder.CreateIntCast(
8554           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8555       // The default map type for a scalar/complex type is 'to' because by
8556       // default the value doesn't have to be retrieved. For an aggregate
8557       // type, the default is 'tofrom'.
8558       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8559       const VarDecl *VD = CI.getCapturedVar();
8560       auto I = FirstPrivateDecls.find(VD);
8561       if (I != FirstPrivateDecls.end() &&
8562           VD->getType().isConstant(CGF.getContext())) {
8563         llvm::Constant *Addr =
8564             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8565         // Copy the value of the original variable to the new global copy.
8566         CGF.Builder.CreateMemCpy(
8567             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8568             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8569             CurSizes.back(), /*IsVolatile=*/false);
8570         // Use new global variable as the base pointers.
8571         CurBasePointers.push_back(Addr);
8572         CurPointers.push_back(Addr);
8573       } else {
8574         CurBasePointers.push_back(CV);
8575         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8576           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8577               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8578               AlignmentSource::Decl));
8579           CurPointers.push_back(PtrAddr.getPointer());
8580         } else {
8581           CurPointers.push_back(CV);
8582         }
8583       }
8584       if (I != FirstPrivateDecls.end())
8585         IsImplicit = I->getSecond();
8586     }
8587     // Every default map produces a single argument which is a target parameter.
8588     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8589 
8590     // Add flag stating this is an implicit map.
8591     if (IsImplicit)
8592       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8593   }
8594 };
8595 } // anonymous namespace
8596 
8597 /// Emit the arrays used to pass the captures and map information to the
8598 /// offloading runtime library. If there is no map or capture information,
8599 /// return nullptr by reference.
8600 static void
8601 emitOffloadingArrays(CodeGenFunction &CGF,
8602                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8603                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8604                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8605                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8606                      CGOpenMPRuntime::TargetDataInfo &Info) {
8607   CodeGenModule &CGM = CGF.CGM;
8608   ASTContext &Ctx = CGF.getContext();
8609 
8610   // Reset the array information.
8611   Info.clearArrayInfo();
8612   Info.NumberOfPtrs = BasePointers.size();
8613 
8614   if (Info.NumberOfPtrs) {
8615     // Detect if we have any capture size requiring runtime evaluation of the
8616     // size so that a constant array could be eventually used.
8617     bool hasRuntimeEvaluationCaptureSize = false;
8618     for (llvm::Value *S : Sizes)
8619       if (!isa<llvm::Constant>(S)) {
8620         hasRuntimeEvaluationCaptureSize = true;
8621         break;
8622       }
8623 
8624     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8625     QualType PointerArrayType =
8626         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8627                                  /*IndexTypeQuals=*/0);
8628 
8629     Info.BasePointersArray =
8630         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8631     Info.PointersArray =
8632         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8633 
8634     // If we don't have any VLA types or other types that require runtime
8635     // evaluation, we can use a constant array for the map sizes, otherwise we
8636     // need to fill up the arrays as we do for the pointers.
8637     QualType Int64Ty =
8638         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8639     if (hasRuntimeEvaluationCaptureSize) {
8640       QualType SizeArrayType =
8641           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8642                                    /*IndexTypeQuals=*/0);
8643       Info.SizesArray =
8644           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8645     } else {
8646       // We expect all the sizes to be constant, so we collect them to create
8647       // a constant array.
8648       SmallVector<llvm::Constant *, 16> ConstSizes;
8649       for (llvm::Value *S : Sizes)
8650         ConstSizes.push_back(cast<llvm::Constant>(S));
8651 
8652       auto *SizesArrayInit = llvm::ConstantArray::get(
8653           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8654       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8655       auto *SizesArrayGbl = new llvm::GlobalVariable(
8656           CGM.getModule(), SizesArrayInit->getType(),
8657           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8658           SizesArrayInit, Name);
8659       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8660       Info.SizesArray = SizesArrayGbl;
8661     }
8662 
8663     // The map types are always constant so we don't need to generate code to
8664     // fill arrays. Instead, we create an array constant.
8665     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8666     llvm::copy(MapTypes, Mapping.begin());
8667     llvm::Constant *MapTypesArrayInit =
8668         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8669     std::string MaptypesName =
8670         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8671     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8672         CGM.getModule(), MapTypesArrayInit->getType(),
8673         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8674         MapTypesArrayInit, MaptypesName);
8675     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8676     Info.MapTypesArray = MapTypesArrayGbl;
8677 
8678     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8679       llvm::Value *BPVal = *BasePointers[I];
8680       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8681           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8682           Info.BasePointersArray, 0, I);
8683       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8684           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8685       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8686       CGF.Builder.CreateStore(BPVal, BPAddr);
8687 
8688       if (Info.requiresDevicePointerInfo())
8689         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8690           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8691 
8692       llvm::Value *PVal = Pointers[I];
8693       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8694           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8695           Info.PointersArray, 0, I);
8696       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8697           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8698       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8699       CGF.Builder.CreateStore(PVal, PAddr);
8700 
8701       if (hasRuntimeEvaluationCaptureSize) {
8702         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8703             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8704             Info.SizesArray,
8705             /*Idx0=*/0,
8706             /*Idx1=*/I);
8707         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8708         CGF.Builder.CreateStore(
8709             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8710             SAddr);
8711       }
8712     }
8713   }
8714 }
8715 
8716 /// Emit the arguments to be passed to the runtime library based on the
8717 /// arrays of pointers, sizes and map types.
8718 static void emitOffloadingArraysArgument(
8719     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8720     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8721     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8722   CodeGenModule &CGM = CGF.CGM;
8723   if (Info.NumberOfPtrs) {
8724     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8725         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8726         Info.BasePointersArray,
8727         /*Idx0=*/0, /*Idx1=*/0);
8728     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8729         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8730         Info.PointersArray,
8731         /*Idx0=*/0,
8732         /*Idx1=*/0);
8733     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8734         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8735         /*Idx0=*/0, /*Idx1=*/0);
8736     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8737         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8738         Info.MapTypesArray,
8739         /*Idx0=*/0,
8740         /*Idx1=*/0);
8741   } else {
8742     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8743     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8744     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8745     MapTypesArrayArg =
8746         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8747   }
8748 }
8749 
8750 /// Check for inner distribute directive.
8751 static const OMPExecutableDirective *
8752 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8753   const auto *CS = D.getInnermostCapturedStmt();
8754   const auto *Body =
8755       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8756   const Stmt *ChildStmt =
8757       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8758 
8759   if (const auto *NestedDir =
8760           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8761     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8762     switch (D.getDirectiveKind()) {
8763     case OMPD_target:
8764       if (isOpenMPDistributeDirective(DKind))
8765         return NestedDir;
8766       if (DKind == OMPD_teams) {
8767         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8768             /*IgnoreCaptured=*/true);
8769         if (!Body)
8770           return nullptr;
8771         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8772         if (const auto *NND =
8773                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8774           DKind = NND->getDirectiveKind();
8775           if (isOpenMPDistributeDirective(DKind))
8776             return NND;
8777         }
8778       }
8779       return nullptr;
8780     case OMPD_target_teams:
8781       if (isOpenMPDistributeDirective(DKind))
8782         return NestedDir;
8783       return nullptr;
8784     case OMPD_target_parallel:
8785     case OMPD_target_simd:
8786     case OMPD_target_parallel_for:
8787     case OMPD_target_parallel_for_simd:
8788       return nullptr;
8789     case OMPD_target_teams_distribute:
8790     case OMPD_target_teams_distribute_simd:
8791     case OMPD_target_teams_distribute_parallel_for:
8792     case OMPD_target_teams_distribute_parallel_for_simd:
8793     case OMPD_parallel:
8794     case OMPD_for:
8795     case OMPD_parallel_for:
8796     case OMPD_parallel_sections:
8797     case OMPD_for_simd:
8798     case OMPD_parallel_for_simd:
8799     case OMPD_cancel:
8800     case OMPD_cancellation_point:
8801     case OMPD_ordered:
8802     case OMPD_threadprivate:
8803     case OMPD_allocate:
8804     case OMPD_task:
8805     case OMPD_simd:
8806     case OMPD_sections:
8807     case OMPD_section:
8808     case OMPD_single:
8809     case OMPD_master:
8810     case OMPD_critical:
8811     case OMPD_taskyield:
8812     case OMPD_barrier:
8813     case OMPD_taskwait:
8814     case OMPD_taskgroup:
8815     case OMPD_atomic:
8816     case OMPD_flush:
8817     case OMPD_teams:
8818     case OMPD_target_data:
8819     case OMPD_target_exit_data:
8820     case OMPD_target_enter_data:
8821     case OMPD_distribute:
8822     case OMPD_distribute_simd:
8823     case OMPD_distribute_parallel_for:
8824     case OMPD_distribute_parallel_for_simd:
8825     case OMPD_teams_distribute:
8826     case OMPD_teams_distribute_simd:
8827     case OMPD_teams_distribute_parallel_for:
8828     case OMPD_teams_distribute_parallel_for_simd:
8829     case OMPD_target_update:
8830     case OMPD_declare_simd:
8831     case OMPD_declare_variant:
8832     case OMPD_declare_target:
8833     case OMPD_end_declare_target:
8834     case OMPD_declare_reduction:
8835     case OMPD_declare_mapper:
8836     case OMPD_taskloop:
8837     case OMPD_taskloop_simd:
8838     case OMPD_requires:
8839     case OMPD_unknown:
8840       llvm_unreachable("Unexpected directive.");
8841     }
8842   }
8843 
8844   return nullptr;
8845 }
8846 
8847 /// Emit the user-defined mapper function. The code generation follows the
8848 /// pattern in the example below.
8849 /// \code
8850 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8851 ///                                           void *base, void *begin,
8852 ///                                           int64_t size, int64_t type) {
8853 ///   // Allocate space for an array section first.
8854 ///   if (size > 1 && !maptype.IsDelete)
8855 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8856 ///                                 size*sizeof(Ty), clearToFrom(type));
8857 ///   // Map members.
8858 ///   for (unsigned i = 0; i < size; i++) {
8859 ///     // For each component specified by this mapper:
8860 ///     for (auto c : all_components) {
8861 ///       if (c.hasMapper())
8862 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8863 ///                       c.arg_type);
8864 ///       else
8865 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8866 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8867 ///     }
8868 ///   }
8869 ///   // Delete the array section.
8870 ///   if (size > 1 && maptype.IsDelete)
8871 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8872 ///                                 size*sizeof(Ty), clearToFrom(type));
8873 /// }
8874 /// \endcode
8875 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8876                                             CodeGenFunction *CGF) {
8877   if (UDMMap.count(D) > 0)
8878     return;
8879   ASTContext &C = CGM.getContext();
8880   QualType Ty = D->getType();
8881   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8882   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8883   auto *MapperVarDecl =
8884       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8885   SourceLocation Loc = D->getLocation();
8886   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8887 
8888   // Prepare mapper function arguments and attributes.
8889   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8890                               C.VoidPtrTy, ImplicitParamDecl::Other);
8891   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8892                             ImplicitParamDecl::Other);
8893   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8894                              C.VoidPtrTy, ImplicitParamDecl::Other);
8895   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8896                             ImplicitParamDecl::Other);
8897   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8898                             ImplicitParamDecl::Other);
8899   FunctionArgList Args;
8900   Args.push_back(&HandleArg);
8901   Args.push_back(&BaseArg);
8902   Args.push_back(&BeginArg);
8903   Args.push_back(&SizeArg);
8904   Args.push_back(&TypeArg);
8905   const CGFunctionInfo &FnInfo =
8906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8908   SmallString<64> TyStr;
8909   llvm::raw_svector_ostream Out(TyStr);
8910   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8911   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8912   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8913                                     Name, &CGM.getModule());
8914   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8915   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8916   // Start the mapper function code generation.
8917   CodeGenFunction MapperCGF(CGM);
8918   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8919   // Compute the starting and end addreses of array elements.
8920   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8921       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8922       C.getPointerType(Int64Ty), Loc);
8923   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8924       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8925       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8926   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8927   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8928       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8929       C.getPointerType(Int64Ty), Loc);
8930   // Prepare common arguments for array initiation and deletion.
8931   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8932       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8933       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8934   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8935       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8936       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8937   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8938       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8939       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8940 
8941   // Emit array initiation if this is an array section and \p MapType indicates
8942   // that memory allocation is required.
8943   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8944   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8945                              ElementSize, HeadBB, /*IsInit=*/true);
8946 
8947   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8948 
8949   // Emit the loop header block.
8950   MapperCGF.EmitBlock(HeadBB);
8951   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8952   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8953   // Evaluate whether the initial condition is satisfied.
8954   llvm::Value *IsEmpty =
8955       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8956   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8957   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8958 
8959   // Emit the loop body block.
8960   MapperCGF.EmitBlock(BodyBB);
8961   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8962       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8963   PtrPHI->addIncoming(PtrBegin, EntryBB);
8964   Address PtrCurrent =
8965       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8966                           .getAlignment()
8967                           .alignmentOfArrayElement(ElementSize));
8968   // Privatize the declared variable of mapper to be the current array element.
8969   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8970   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8971     return MapperCGF
8972         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8973         .getAddress();
8974   });
8975   (void)Scope.Privatize();
8976 
8977   // Get map clause information. Fill up the arrays with all mapped variables.
8978   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8979   MappableExprsHandler::MapValuesArrayTy Pointers;
8980   MappableExprsHandler::MapValuesArrayTy Sizes;
8981   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8982   MappableExprsHandler MEHandler(*D, MapperCGF);
8983   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8984 
8985   // Call the runtime API __tgt_mapper_num_components to get the number of
8986   // pre-existing components.
8987   llvm::Value *OffloadingArgs[] = {Handle};
8988   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8989       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8990   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8991       PreviousSize,
8992       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8993 
8994   // Fill up the runtime mapper handle for all components.
8995   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8996     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8997         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8998     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8999         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9000     llvm::Value *CurSizeArg = Sizes[I];
9001 
9002     // Extract the MEMBER_OF field from the map type.
9003     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9004     MapperCGF.EmitBlock(MemberBB);
9005     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9006     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9007         OriMapType,
9008         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9009     llvm::BasicBlock *MemberCombineBB =
9010         MapperCGF.createBasicBlock("omp.member.combine");
9011     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9012     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9013     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9014     // Add the number of pre-existing components to the MEMBER_OF field if it
9015     // is valid.
9016     MapperCGF.EmitBlock(MemberCombineBB);
9017     llvm::Value *CombinedMember =
9018         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9019     // Do nothing if it is not a member of previous components.
9020     MapperCGF.EmitBlock(TypeBB);
9021     llvm::PHINode *MemberMapType =
9022         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9023     MemberMapType->addIncoming(OriMapType, MemberBB);
9024     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9025 
9026     // Combine the map type inherited from user-defined mapper with that
9027     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9028     // bits of the \a MapType, which is the input argument of the mapper
9029     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9030     // bits of MemberMapType.
9031     // [OpenMP 5.0], 1.2.6. map-type decay.
9032     //        | alloc |  to   | from  | tofrom | release | delete
9033     // ----------------------------------------------------------
9034     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9035     // to     | alloc |  to   | alloc |   to   | release | delete
9036     // from   | alloc | alloc | from  |  from  | release | delete
9037     // tofrom | alloc |  to   | from  | tofrom | release | delete
9038     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9039         MapType,
9040         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9041                                    MappableExprsHandler::OMP_MAP_FROM));
9042     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9043     llvm::BasicBlock *AllocElseBB =
9044         MapperCGF.createBasicBlock("omp.type.alloc.else");
9045     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9046     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9047     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9048     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9049     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9050     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9051     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9052     MapperCGF.EmitBlock(AllocBB);
9053     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9054         MemberMapType,
9055         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9056                                      MappableExprsHandler::OMP_MAP_FROM)));
9057     MapperCGF.Builder.CreateBr(EndBB);
9058     MapperCGF.EmitBlock(AllocElseBB);
9059     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9060         LeftToFrom,
9061         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9062     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9063     // In case of to, clear OMP_MAP_FROM.
9064     MapperCGF.EmitBlock(ToBB);
9065     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9066         MemberMapType,
9067         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9068     MapperCGF.Builder.CreateBr(EndBB);
9069     MapperCGF.EmitBlock(ToElseBB);
9070     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9071         LeftToFrom,
9072         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9073     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9074     // In case of from, clear OMP_MAP_TO.
9075     MapperCGF.EmitBlock(FromBB);
9076     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9077         MemberMapType,
9078         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9079     // In case of tofrom, do nothing.
9080     MapperCGF.EmitBlock(EndBB);
9081     llvm::PHINode *CurMapType =
9082         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9083     CurMapType->addIncoming(AllocMapType, AllocBB);
9084     CurMapType->addIncoming(ToMapType, ToBB);
9085     CurMapType->addIncoming(FromMapType, FromBB);
9086     CurMapType->addIncoming(MemberMapType, ToElseBB);
9087 
9088     // TODO: call the corresponding mapper function if a user-defined mapper is
9089     // associated with this map clause.
9090     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9091     // data structure.
9092     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9093                                      CurSizeArg, CurMapType};
9094     MapperCGF.EmitRuntimeCall(
9095         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9096         OffloadingArgs);
9097   }
9098 
9099   // Update the pointer to point to the next element that needs to be mapped,
9100   // and check whether we have mapped all elements.
9101   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9102       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9103   PtrPHI->addIncoming(PtrNext, BodyBB);
9104   llvm::Value *IsDone =
9105       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9106   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9107   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9108 
9109   MapperCGF.EmitBlock(ExitBB);
9110   // Emit array deletion if this is an array section and \p MapType indicates
9111   // that deletion is required.
9112   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9113                              ElementSize, DoneBB, /*IsInit=*/false);
9114 
9115   // Emit the function exit block.
9116   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9117   MapperCGF.FinishFunction();
9118   UDMMap.try_emplace(D, Fn);
9119   if (CGF) {
9120     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9121     Decls.second.push_back(D);
9122   }
9123 }
9124 
9125 /// Emit the array initialization or deletion portion for user-defined mapper
9126 /// code generation. First, it evaluates whether an array section is mapped and
9127 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9128 /// true, and \a MapType indicates to not delete this array, array
9129 /// initialization code is generated. If \a IsInit is false, and \a MapType
9130 /// indicates to not this array, array deletion code is generated.
9131 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9132     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9133     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9134     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9135   StringRef Prefix = IsInit ? ".init" : ".del";
9136 
9137   // Evaluate if this is an array section.
9138   llvm::BasicBlock *IsDeleteBB =
9139       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9140   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9141   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9142       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9143   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9144 
9145   // Evaluate if we are going to delete this section.
9146   MapperCGF.EmitBlock(IsDeleteBB);
9147   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9148       MapType,
9149       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9150   llvm::Value *DeleteCond;
9151   if (IsInit) {
9152     DeleteCond = MapperCGF.Builder.CreateIsNull(
9153         DeleteBit, "omp.array" + Prefix + ".delete");
9154   } else {
9155     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9156         DeleteBit, "omp.array" + Prefix + ".delete");
9157   }
9158   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9159 
9160   MapperCGF.EmitBlock(BodyBB);
9161   // Get the array size by multiplying element size and element number (i.e., \p
9162   // Size).
9163   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9164       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9165   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9166   // memory allocation/deletion purpose only.
9167   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9168       MapType,
9169       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9170                                    MappableExprsHandler::OMP_MAP_FROM)));
9171   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9172   // data structure.
9173   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9174   MapperCGF.EmitRuntimeCall(
9175       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9176 }
9177 
9178 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9179     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
9180     const llvm::function_ref<llvm::Value *(
9181         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
9182   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9183   const OMPExecutableDirective *TD = &D;
9184   // Get nested teams distribute kind directive, if any.
9185   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9186     TD = getNestedDistributeDirective(CGM.getContext(), D);
9187   if (!TD)
9188     return;
9189   const auto *LD = cast<OMPLoopDirective>(TD);
9190   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
9191                                                      PrePostActionTy &) {
9192     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
9193 
9194     // Emit device ID if any.
9195     llvm::Value *DeviceID;
9196     if (Device)
9197       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9198                                            CGF.Int64Ty, /*isSigned=*/true);
9199     else
9200       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9201 
9202     llvm::Value *Args[] = {DeviceID, NumIterations};
9203     CGF.EmitRuntimeCall(
9204         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9205   };
9206   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9207 }
9208 
9209 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
9210                                      const OMPExecutableDirective &D,
9211                                      llvm::Function *OutlinedFn,
9212                                      llvm::Value *OutlinedFnID,
9213                                      const Expr *IfCond, const Expr *Device) {
9214   if (!CGF.HaveInsertPoint())
9215     return;
9216 
9217   assert(OutlinedFn && "Invalid outlined function!");
9218 
9219   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9220   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9221   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9222   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9223                                             PrePostActionTy &) {
9224     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9225   };
9226   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9227 
9228   CodeGenFunction::OMPTargetDataInfo InputInfo;
9229   llvm::Value *MapTypesArray = nullptr;
9230   // Fill up the pointer arrays and transfer execution to the device.
9231   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9232                     &MapTypesArray, &CS, RequiresOuterTask,
9233                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
9234     // On top of the arrays that were filled up, the target offloading call
9235     // takes as arguments the device id as well as the host pointer. The host
9236     // pointer is used by the runtime library to identify the current target
9237     // region, so it only has to be unique and not necessarily point to
9238     // anything. It could be the pointer to the outlined function that
9239     // implements the target region, but we aren't using that so that the
9240     // compiler doesn't need to keep that, and could therefore inline the host
9241     // function if proven worthwhile during optimization.
9242 
9243     // From this point on, we need to have an ID of the target region defined.
9244     assert(OutlinedFnID && "Invalid outlined function ID!");
9245 
9246     // Emit device ID if any.
9247     llvm::Value *DeviceID;
9248     if (Device) {
9249       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9250                                            CGF.Int64Ty, /*isSigned=*/true);
9251     } else {
9252       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9253     }
9254 
9255     // Emit the number of elements in the offloading arrays.
9256     llvm::Value *PointerNum =
9257         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9258 
9259     // Return value of the runtime offloading call.
9260     llvm::Value *Return;
9261 
9262     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9263     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9264 
9265     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9266     // The target region is an outlined function launched by the runtime
9267     // via calls __tgt_target() or __tgt_target_teams().
9268     //
9269     // __tgt_target() launches a target region with one team and one thread,
9270     // executing a serial region.  This master thread may in turn launch
9271     // more threads within its team upon encountering a parallel region,
9272     // however, no additional teams can be launched on the device.
9273     //
9274     // __tgt_target_teams() launches a target region with one or more teams,
9275     // each with one or more threads.  This call is required for target
9276     // constructs such as:
9277     //  'target teams'
9278     //  'target' / 'teams'
9279     //  'target teams distribute parallel for'
9280     //  'target parallel'
9281     // and so on.
9282     //
9283     // Note that on the host and CPU targets, the runtime implementation of
9284     // these calls simply call the outlined function without forking threads.
9285     // The outlined functions themselves have runtime calls to
9286     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9287     // the compiler in emitTeamsCall() and emitParallelCall().
9288     //
9289     // In contrast, on the NVPTX target, the implementation of
9290     // __tgt_target_teams() launches a GPU kernel with the requested number
9291     // of teams and threads so no additional calls to the runtime are required.
9292     if (NumTeams) {
9293       // If we have NumTeams defined this means that we have an enclosed teams
9294       // region. Therefore we also expect to have NumThreads defined. These two
9295       // values should be defined in the presence of a teams directive,
9296       // regardless of having any clauses associated. If the user is using teams
9297       // but no clauses, these two values will be the default that should be
9298       // passed to the runtime library - a 32-bit integer with the value zero.
9299       assert(NumThreads && "Thread limit expression should be available along "
9300                            "with number of teams.");
9301       llvm::Value *OffloadingArgs[] = {DeviceID,
9302                                        OutlinedFnID,
9303                                        PointerNum,
9304                                        InputInfo.BasePointersArray.getPointer(),
9305                                        InputInfo.PointersArray.getPointer(),
9306                                        InputInfo.SizesArray.getPointer(),
9307                                        MapTypesArray,
9308                                        NumTeams,
9309                                        NumThreads};
9310       Return = CGF.EmitRuntimeCall(
9311           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9312                                           : OMPRTL__tgt_target_teams),
9313           OffloadingArgs);
9314     } else {
9315       llvm::Value *OffloadingArgs[] = {DeviceID,
9316                                        OutlinedFnID,
9317                                        PointerNum,
9318                                        InputInfo.BasePointersArray.getPointer(),
9319                                        InputInfo.PointersArray.getPointer(),
9320                                        InputInfo.SizesArray.getPointer(),
9321                                        MapTypesArray};
9322       Return = CGF.EmitRuntimeCall(
9323           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9324                                           : OMPRTL__tgt_target),
9325           OffloadingArgs);
9326     }
9327 
9328     // Check the error code and execute the host version if required.
9329     llvm::BasicBlock *OffloadFailedBlock =
9330         CGF.createBasicBlock("omp_offload.failed");
9331     llvm::BasicBlock *OffloadContBlock =
9332         CGF.createBasicBlock("omp_offload.cont");
9333     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9334     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9335 
9336     CGF.EmitBlock(OffloadFailedBlock);
9337     if (RequiresOuterTask) {
9338       CapturedVars.clear();
9339       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9340     }
9341     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9342     CGF.EmitBranch(OffloadContBlock);
9343 
9344     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9345   };
9346 
9347   // Notify that the host version must be executed.
9348   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9349                     RequiresOuterTask](CodeGenFunction &CGF,
9350                                        PrePostActionTy &) {
9351     if (RequiresOuterTask) {
9352       CapturedVars.clear();
9353       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9354     }
9355     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9356   };
9357 
9358   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9359                           &CapturedVars, RequiresOuterTask,
9360                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9361     // Fill up the arrays with all the captured variables.
9362     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9363     MappableExprsHandler::MapValuesArrayTy Pointers;
9364     MappableExprsHandler::MapValuesArrayTy Sizes;
9365     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9366 
9367     // Get mappable expression information.
9368     MappableExprsHandler MEHandler(D, CGF);
9369     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9370 
9371     auto RI = CS.getCapturedRecordDecl()->field_begin();
9372     auto CV = CapturedVars.begin();
9373     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9374                                               CE = CS.capture_end();
9375          CI != CE; ++CI, ++RI, ++CV) {
9376       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9377       MappableExprsHandler::MapValuesArrayTy CurPointers;
9378       MappableExprsHandler::MapValuesArrayTy CurSizes;
9379       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9380       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9381 
9382       // VLA sizes are passed to the outlined region by copy and do not have map
9383       // information associated.
9384       if (CI->capturesVariableArrayType()) {
9385         CurBasePointers.push_back(*CV);
9386         CurPointers.push_back(*CV);
9387         CurSizes.push_back(CGF.Builder.CreateIntCast(
9388             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9389         // Copy to the device as an argument. No need to retrieve it.
9390         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9391                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9392                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9393       } else {
9394         // If we have any information in the map clause, we use it, otherwise we
9395         // just do a default mapping.
9396         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9397                                          CurSizes, CurMapTypes, PartialStruct);
9398         if (CurBasePointers.empty())
9399           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9400                                            CurPointers, CurSizes, CurMapTypes);
9401         // Generate correct mapping for variables captured by reference in
9402         // lambdas.
9403         if (CI->capturesVariable())
9404           MEHandler.generateInfoForLambdaCaptures(
9405               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9406               CurMapTypes, LambdaPointers);
9407       }
9408       // We expect to have at least an element of information for this capture.
9409       assert(!CurBasePointers.empty() &&
9410              "Non-existing map pointer for capture!");
9411       assert(CurBasePointers.size() == CurPointers.size() &&
9412              CurBasePointers.size() == CurSizes.size() &&
9413              CurBasePointers.size() == CurMapTypes.size() &&
9414              "Inconsistent map information sizes!");
9415 
9416       // If there is an entry in PartialStruct it means we have a struct with
9417       // individual members mapped. Emit an extra combined entry.
9418       if (PartialStruct.Base.isValid())
9419         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9420                                     CurMapTypes, PartialStruct);
9421 
9422       // We need to append the results of this capture to what we already have.
9423       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9424       Pointers.append(CurPointers.begin(), CurPointers.end());
9425       Sizes.append(CurSizes.begin(), CurSizes.end());
9426       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9427     }
9428     // Adjust MEMBER_OF flags for the lambdas captures.
9429     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9430                                               Pointers, MapTypes);
9431     // Map other list items in the map clause which are not captured variables
9432     // but "declare target link" global variables.
9433     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9434                                                MapTypes);
9435 
9436     TargetDataInfo Info;
9437     // Fill up the arrays and create the arguments.
9438     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9439     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9440                                  Info.PointersArray, Info.SizesArray,
9441                                  Info.MapTypesArray, Info);
9442     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9443     InputInfo.BasePointersArray =
9444         Address(Info.BasePointersArray, CGM.getPointerAlign());
9445     InputInfo.PointersArray =
9446         Address(Info.PointersArray, CGM.getPointerAlign());
9447     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9448     MapTypesArray = Info.MapTypesArray;
9449     if (RequiresOuterTask)
9450       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9451     else
9452       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9453   };
9454 
9455   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9456                              CodeGenFunction &CGF, PrePostActionTy &) {
9457     if (RequiresOuterTask) {
9458       CodeGenFunction::OMPTargetDataInfo InputInfo;
9459       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9460     } else {
9461       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9462     }
9463   };
9464 
9465   // If we have a target function ID it means that we need to support
9466   // offloading, otherwise, just execute on the host. We need to execute on host
9467   // regardless of the conditional in the if clause if, e.g., the user do not
9468   // specify target triples.
9469   if (OutlinedFnID) {
9470     if (IfCond) {
9471       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9472     } else {
9473       RegionCodeGenTy ThenRCG(TargetThenGen);
9474       ThenRCG(CGF);
9475     }
9476   } else {
9477     RegionCodeGenTy ElseRCG(TargetElseGen);
9478     ElseRCG(CGF);
9479   }
9480 }
9481 
9482 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9483                                                     StringRef ParentName) {
9484   if (!S)
9485     return;
9486 
9487   // Codegen OMP target directives that offload compute to the device.
9488   bool RequiresDeviceCodegen =
9489       isa<OMPExecutableDirective>(S) &&
9490       isOpenMPTargetExecutionDirective(
9491           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9492 
9493   if (RequiresDeviceCodegen) {
9494     const auto &E = *cast<OMPExecutableDirective>(S);
9495     unsigned DeviceID;
9496     unsigned FileID;
9497     unsigned Line;
9498     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9499                              FileID, Line);
9500 
9501     // Is this a target region that should not be emitted as an entry point? If
9502     // so just signal we are done with this target region.
9503     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9504                                                             ParentName, Line))
9505       return;
9506 
9507     switch (E.getDirectiveKind()) {
9508     case OMPD_target:
9509       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9510                                                    cast<OMPTargetDirective>(E));
9511       break;
9512     case OMPD_target_parallel:
9513       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9514           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9515       break;
9516     case OMPD_target_teams:
9517       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9518           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9519       break;
9520     case OMPD_target_teams_distribute:
9521       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9522           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9523       break;
9524     case OMPD_target_teams_distribute_simd:
9525       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9526           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9527       break;
9528     case OMPD_target_parallel_for:
9529       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9530           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9531       break;
9532     case OMPD_target_parallel_for_simd:
9533       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9534           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9535       break;
9536     case OMPD_target_simd:
9537       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9538           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9539       break;
9540     case OMPD_target_teams_distribute_parallel_for:
9541       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9542           CGM, ParentName,
9543           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9544       break;
9545     case OMPD_target_teams_distribute_parallel_for_simd:
9546       CodeGenFunction::
9547           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9548               CGM, ParentName,
9549               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9550       break;
9551     case OMPD_parallel:
9552     case OMPD_for:
9553     case OMPD_parallel_for:
9554     case OMPD_parallel_sections:
9555     case OMPD_for_simd:
9556     case OMPD_parallel_for_simd:
9557     case OMPD_cancel:
9558     case OMPD_cancellation_point:
9559     case OMPD_ordered:
9560     case OMPD_threadprivate:
9561     case OMPD_allocate:
9562     case OMPD_task:
9563     case OMPD_simd:
9564     case OMPD_sections:
9565     case OMPD_section:
9566     case OMPD_single:
9567     case OMPD_master:
9568     case OMPD_critical:
9569     case OMPD_taskyield:
9570     case OMPD_barrier:
9571     case OMPD_taskwait:
9572     case OMPD_taskgroup:
9573     case OMPD_atomic:
9574     case OMPD_flush:
9575     case OMPD_teams:
9576     case OMPD_target_data:
9577     case OMPD_target_exit_data:
9578     case OMPD_target_enter_data:
9579     case OMPD_distribute:
9580     case OMPD_distribute_simd:
9581     case OMPD_distribute_parallel_for:
9582     case OMPD_distribute_parallel_for_simd:
9583     case OMPD_teams_distribute:
9584     case OMPD_teams_distribute_simd:
9585     case OMPD_teams_distribute_parallel_for:
9586     case OMPD_teams_distribute_parallel_for_simd:
9587     case OMPD_target_update:
9588     case OMPD_declare_simd:
9589     case OMPD_declare_variant:
9590     case OMPD_declare_target:
9591     case OMPD_end_declare_target:
9592     case OMPD_declare_reduction:
9593     case OMPD_declare_mapper:
9594     case OMPD_taskloop:
9595     case OMPD_taskloop_simd:
9596     case OMPD_requires:
9597     case OMPD_unknown:
9598       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9599     }
9600     return;
9601   }
9602 
9603   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9604     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9605       return;
9606 
9607     scanForTargetRegionsFunctions(
9608         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9609     return;
9610   }
9611 
9612   // If this is a lambda function, look into its body.
9613   if (const auto *L = dyn_cast<LambdaExpr>(S))
9614     S = L->getBody();
9615 
9616   // Keep looking for target regions recursively.
9617   for (const Stmt *II : S->children())
9618     scanForTargetRegionsFunctions(II, ParentName);
9619 }
9620 
9621 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9622   // If emitting code for the host, we do not process FD here. Instead we do
9623   // the normal code generation.
9624   if (!CGM.getLangOpts().OpenMPIsDevice) {
9625     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9626       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9627           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9628       // Do not emit device_type(nohost) functions for the host.
9629       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9630         return true;
9631     }
9632     return false;
9633   }
9634 
9635   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9636   StringRef Name = CGM.getMangledName(GD);
9637   // Try to detect target regions in the function.
9638   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9639     scanForTargetRegionsFunctions(FD->getBody(), Name);
9640     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9641         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9642     // Do not emit device_type(nohost) functions for the host.
9643     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9644       return true;
9645   }
9646 
9647   // Do not to emit function if it is not marked as declare target.
9648   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9649          AlreadyEmittedTargetFunctions.count(Name) == 0;
9650 }
9651 
9652 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9653   if (!CGM.getLangOpts().OpenMPIsDevice)
9654     return false;
9655 
9656   // Check if there are Ctors/Dtors in this declaration and look for target
9657   // regions in it. We use the complete variant to produce the kernel name
9658   // mangling.
9659   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9660   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9661     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9662       StringRef ParentName =
9663           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9664       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9665     }
9666     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9667       StringRef ParentName =
9668           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9669       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9670     }
9671   }
9672 
9673   // Do not to emit variable if it is not marked as declare target.
9674   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9675       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9676           cast<VarDecl>(GD.getDecl()));
9677   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9678       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9679        HasRequiresUnifiedSharedMemory)) {
9680     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9681     return true;
9682   }
9683   return false;
9684 }
9685 
9686 llvm::Constant *
9687 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9688                                                 const VarDecl *VD) {
9689   assert(VD->getType().isConstant(CGM.getContext()) &&
9690          "Expected constant variable.");
9691   StringRef VarName;
9692   llvm::Constant *Addr;
9693   llvm::GlobalValue::LinkageTypes Linkage;
9694   QualType Ty = VD->getType();
9695   SmallString<128> Buffer;
9696   {
9697     unsigned DeviceID;
9698     unsigned FileID;
9699     unsigned Line;
9700     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9701                              FileID, Line);
9702     llvm::raw_svector_ostream OS(Buffer);
9703     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9704        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9705     VarName = OS.str();
9706   }
9707   Linkage = llvm::GlobalValue::InternalLinkage;
9708   Addr =
9709       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9710                                   getDefaultFirstprivateAddressSpace());
9711   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9712   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9713   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9714   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9715       VarName, Addr, VarSize,
9716       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9717   return Addr;
9718 }
9719 
9720 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9721                                                    llvm::Constant *Addr) {
9722   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9723       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9724   if (!Res) {
9725     if (CGM.getLangOpts().OpenMPIsDevice) {
9726       // Register non-target variables being emitted in device code (debug info
9727       // may cause this).
9728       StringRef VarName = CGM.getMangledName(VD);
9729       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9730     }
9731     return;
9732   }
9733   // Register declare target variables.
9734   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9735   StringRef VarName;
9736   CharUnits VarSize;
9737   llvm::GlobalValue::LinkageTypes Linkage;
9738 
9739   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9740       !HasRequiresUnifiedSharedMemory) {
9741     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9742     VarName = CGM.getMangledName(VD);
9743     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9744       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9745       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9746     } else {
9747       VarSize = CharUnits::Zero();
9748     }
9749     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9750     // Temp solution to prevent optimizations of the internal variables.
9751     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9752       std::string RefName = getName({VarName, "ref"});
9753       if (!CGM.GetGlobalValue(RefName)) {
9754         llvm::Constant *AddrRef =
9755             getOrCreateInternalVariable(Addr->getType(), RefName);
9756         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9757         GVAddrRef->setConstant(/*Val=*/true);
9758         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9759         GVAddrRef->setInitializer(Addr);
9760         CGM.addCompilerUsedGlobal(GVAddrRef);
9761       }
9762     }
9763   } else {
9764     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9765             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9766              HasRequiresUnifiedSharedMemory)) &&
9767            "Declare target attribute must link or to with unified memory.");
9768     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9769       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9770     else
9771       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9772 
9773     if (CGM.getLangOpts().OpenMPIsDevice) {
9774       VarName = Addr->getName();
9775       Addr = nullptr;
9776     } else {
9777       VarName = getAddrOfDeclareTargetVar(VD).getName();
9778       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9779     }
9780     VarSize = CGM.getPointerSize();
9781     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9782   }
9783 
9784   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9785       VarName, Addr, VarSize, Flags, Linkage);
9786 }
9787 
9788 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9789   if (isa<FunctionDecl>(GD.getDecl()) ||
9790       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9791     return emitTargetFunctions(GD);
9792 
9793   return emitTargetGlobalVariable(GD);
9794 }
9795 
9796 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9797   for (const VarDecl *VD : DeferredGlobalVariables) {
9798     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9799         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9800     if (!Res)
9801       continue;
9802     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9803         !HasRequiresUnifiedSharedMemory) {
9804       CGM.EmitGlobal(VD);
9805     } else {
9806       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9807               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9808                HasRequiresUnifiedSharedMemory)) &&
9809              "Expected link clause or to clause with unified memory.");
9810       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9811     }
9812   }
9813 }
9814 
9815 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9816     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9817   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9818          " Expected target-based directive.");
9819 }
9820 
9821 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9822     const OMPRequiresDecl *D) {
9823   for (const OMPClause *Clause : D->clauselists()) {
9824     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9825       HasRequiresUnifiedSharedMemory = true;
9826       break;
9827     }
9828   }
9829 }
9830 
9831 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9832                                                        LangAS &AS) {
9833   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9834     return false;
9835   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9836   switch(A->getAllocatorType()) {
9837   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9838   // Not supported, fallback to the default mem space.
9839   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9840   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9841   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9842   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9843   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9844   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9845   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9846     AS = LangAS::Default;
9847     return true;
9848   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9849     llvm_unreachable("Expected predefined allocator for the variables with the "
9850                      "static storage.");
9851   }
9852   return false;
9853 }
9854 
9855 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9856   return HasRequiresUnifiedSharedMemory;
9857 }
9858 
9859 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9860     CodeGenModule &CGM)
9861     : CGM(CGM) {
9862   if (CGM.getLangOpts().OpenMPIsDevice) {
9863     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9864     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9865   }
9866 }
9867 
9868 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9869   if (CGM.getLangOpts().OpenMPIsDevice)
9870     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9871 }
9872 
9873 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9874   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9875     return true;
9876 
9877   StringRef Name = CGM.getMangledName(GD);
9878   const auto *D = cast<FunctionDecl>(GD.getDecl());
9879   // Do not to emit function if it is marked as declare target as it was already
9880   // emitted.
9881   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9882     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9883       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9884         return !F->isDeclaration();
9885       return false;
9886     }
9887     return true;
9888   }
9889 
9890   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9891 }
9892 
9893 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9894   // If we don't have entries or if we are emitting code for the device, we
9895   // don't need to do anything.
9896   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9897       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9898       (OffloadEntriesInfoManager.empty() &&
9899        !HasEmittedDeclareTargetRegion &&
9900        !HasEmittedTargetRegion))
9901     return nullptr;
9902 
9903   // Create and register the function that handles the requires directives.
9904   ASTContext &C = CGM.getContext();
9905 
9906   llvm::Function *RequiresRegFn;
9907   {
9908     CodeGenFunction CGF(CGM);
9909     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9910     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9911     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9912     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9913     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9914     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9915     // TODO: check for other requires clauses.
9916     // The requires directive takes effect only when a target region is
9917     // present in the compilation unit. Otherwise it is ignored and not
9918     // passed to the runtime. This avoids the runtime from throwing an error
9919     // for mismatching requires clauses across compilation units that don't
9920     // contain at least 1 target region.
9921     assert((HasEmittedTargetRegion ||
9922             HasEmittedDeclareTargetRegion ||
9923             !OffloadEntriesInfoManager.empty()) &&
9924            "Target or declare target region expected.");
9925     if (HasRequiresUnifiedSharedMemory)
9926       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9927     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9928         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9929     CGF.FinishFunction();
9930   }
9931   return RequiresRegFn;
9932 }
9933 
9934 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9935   // If we have offloading in the current module, we need to emit the entries
9936   // now and register the offloading descriptor.
9937   createOffloadEntriesAndInfoMetadata();
9938 
9939   // Create and register the offloading binary descriptors. This is the main
9940   // entity that captures all the information about offloading in the current
9941   // compilation unit.
9942   return createOffloadingBinaryDescriptorRegistration();
9943 }
9944 
9945 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9946                                     const OMPExecutableDirective &D,
9947                                     SourceLocation Loc,
9948                                     llvm::Function *OutlinedFn,
9949                                     ArrayRef<llvm::Value *> CapturedVars) {
9950   if (!CGF.HaveInsertPoint())
9951     return;
9952 
9953   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9954   CodeGenFunction::RunCleanupsScope Scope(CGF);
9955 
9956   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9957   llvm::Value *Args[] = {
9958       RTLoc,
9959       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9960       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9961   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9962   RealArgs.append(std::begin(Args), std::end(Args));
9963   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9964 
9965   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9966   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9967 }
9968 
9969 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9970                                          const Expr *NumTeams,
9971                                          const Expr *ThreadLimit,
9972                                          SourceLocation Loc) {
9973   if (!CGF.HaveInsertPoint())
9974     return;
9975 
9976   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9977 
9978   llvm::Value *NumTeamsVal =
9979       NumTeams
9980           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9981                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9982           : CGF.Builder.getInt32(0);
9983 
9984   llvm::Value *ThreadLimitVal =
9985       ThreadLimit
9986           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9987                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9988           : CGF.Builder.getInt32(0);
9989 
9990   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9991   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9992                                      ThreadLimitVal};
9993   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9994                       PushNumTeamsArgs);
9995 }
9996 
9997 void CGOpenMPRuntime::emitTargetDataCalls(
9998     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9999     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10000   if (!CGF.HaveInsertPoint())
10001     return;
10002 
10003   // Action used to replace the default codegen action and turn privatization
10004   // off.
10005   PrePostActionTy NoPrivAction;
10006 
10007   // Generate the code for the opening of the data environment. Capture all the
10008   // arguments of the runtime call by reference because they are used in the
10009   // closing of the region.
10010   auto &&BeginThenGen = [this, &D, Device, &Info,
10011                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10012     // Fill up the arrays with all the mapped variables.
10013     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10014     MappableExprsHandler::MapValuesArrayTy Pointers;
10015     MappableExprsHandler::MapValuesArrayTy Sizes;
10016     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10017 
10018     // Get map clause information.
10019     MappableExprsHandler MCHandler(D, CGF);
10020     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10021 
10022     // Fill up the arrays and create the arguments.
10023     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10024 
10025     llvm::Value *BasePointersArrayArg = nullptr;
10026     llvm::Value *PointersArrayArg = nullptr;
10027     llvm::Value *SizesArrayArg = nullptr;
10028     llvm::Value *MapTypesArrayArg = nullptr;
10029     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10030                                  SizesArrayArg, MapTypesArrayArg, Info);
10031 
10032     // Emit device ID if any.
10033     llvm::Value *DeviceID = nullptr;
10034     if (Device) {
10035       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10036                                            CGF.Int64Ty, /*isSigned=*/true);
10037     } else {
10038       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10039     }
10040 
10041     // Emit the number of elements in the offloading arrays.
10042     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10043 
10044     llvm::Value *OffloadingArgs[] = {
10045         DeviceID,         PointerNum,    BasePointersArrayArg,
10046         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10047     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10048                         OffloadingArgs);
10049 
10050     // If device pointer privatization is required, emit the body of the region
10051     // here. It will have to be duplicated: with and without privatization.
10052     if (!Info.CaptureDeviceAddrMap.empty())
10053       CodeGen(CGF);
10054   };
10055 
10056   // Generate code for the closing of the data region.
10057   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10058                                             PrePostActionTy &) {
10059     assert(Info.isValid() && "Invalid data environment closing arguments.");
10060 
10061     llvm::Value *BasePointersArrayArg = nullptr;
10062     llvm::Value *PointersArrayArg = nullptr;
10063     llvm::Value *SizesArrayArg = nullptr;
10064     llvm::Value *MapTypesArrayArg = nullptr;
10065     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10066                                  SizesArrayArg, MapTypesArrayArg, Info);
10067 
10068     // Emit device ID if any.
10069     llvm::Value *DeviceID = nullptr;
10070     if (Device) {
10071       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10072                                            CGF.Int64Ty, /*isSigned=*/true);
10073     } else {
10074       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10075     }
10076 
10077     // Emit the number of elements in the offloading arrays.
10078     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10079 
10080     llvm::Value *OffloadingArgs[] = {
10081         DeviceID,         PointerNum,    BasePointersArrayArg,
10082         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10083     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10084                         OffloadingArgs);
10085   };
10086 
10087   // If we need device pointer privatization, we need to emit the body of the
10088   // region with no privatization in the 'else' branch of the conditional.
10089   // Otherwise, we don't have to do anything.
10090   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10091                                                          PrePostActionTy &) {
10092     if (!Info.CaptureDeviceAddrMap.empty()) {
10093       CodeGen.setAction(NoPrivAction);
10094       CodeGen(CGF);
10095     }
10096   };
10097 
10098   // We don't have to do anything to close the region if the if clause evaluates
10099   // to false.
10100   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10101 
10102   if (IfCond) {
10103     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10104   } else {
10105     RegionCodeGenTy RCG(BeginThenGen);
10106     RCG(CGF);
10107   }
10108 
10109   // If we don't require privatization of device pointers, we emit the body in
10110   // between the runtime calls. This avoids duplicating the body code.
10111   if (Info.CaptureDeviceAddrMap.empty()) {
10112     CodeGen.setAction(NoPrivAction);
10113     CodeGen(CGF);
10114   }
10115 
10116   if (IfCond) {
10117     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10118   } else {
10119     RegionCodeGenTy RCG(EndThenGen);
10120     RCG(CGF);
10121   }
10122 }
10123 
10124 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10125     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10126     const Expr *Device) {
10127   if (!CGF.HaveInsertPoint())
10128     return;
10129 
10130   assert((isa<OMPTargetEnterDataDirective>(D) ||
10131           isa<OMPTargetExitDataDirective>(D) ||
10132           isa<OMPTargetUpdateDirective>(D)) &&
10133          "Expecting either target enter, exit data, or update directives.");
10134 
10135   CodeGenFunction::OMPTargetDataInfo InputInfo;
10136   llvm::Value *MapTypesArray = nullptr;
10137   // Generate the code for the opening of the data environment.
10138   auto &&ThenGen = [this, &D, Device, &InputInfo,
10139                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10140     // Emit device ID if any.
10141     llvm::Value *DeviceID = nullptr;
10142     if (Device) {
10143       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10144                                            CGF.Int64Ty, /*isSigned=*/true);
10145     } else {
10146       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10147     }
10148 
10149     // Emit the number of elements in the offloading arrays.
10150     llvm::Constant *PointerNum =
10151         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10152 
10153     llvm::Value *OffloadingArgs[] = {DeviceID,
10154                                      PointerNum,
10155                                      InputInfo.BasePointersArray.getPointer(),
10156                                      InputInfo.PointersArray.getPointer(),
10157                                      InputInfo.SizesArray.getPointer(),
10158                                      MapTypesArray};
10159 
10160     // Select the right runtime function call for each expected standalone
10161     // directive.
10162     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10163     OpenMPRTLFunction RTLFn;
10164     switch (D.getDirectiveKind()) {
10165     case OMPD_target_enter_data:
10166       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10167                         : OMPRTL__tgt_target_data_begin;
10168       break;
10169     case OMPD_target_exit_data:
10170       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10171                         : OMPRTL__tgt_target_data_end;
10172       break;
10173     case OMPD_target_update:
10174       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10175                         : OMPRTL__tgt_target_data_update;
10176       break;
10177     case OMPD_parallel:
10178     case OMPD_for:
10179     case OMPD_parallel_for:
10180     case OMPD_parallel_sections:
10181     case OMPD_for_simd:
10182     case OMPD_parallel_for_simd:
10183     case OMPD_cancel:
10184     case OMPD_cancellation_point:
10185     case OMPD_ordered:
10186     case OMPD_threadprivate:
10187     case OMPD_allocate:
10188     case OMPD_task:
10189     case OMPD_simd:
10190     case OMPD_sections:
10191     case OMPD_section:
10192     case OMPD_single:
10193     case OMPD_master:
10194     case OMPD_critical:
10195     case OMPD_taskyield:
10196     case OMPD_barrier:
10197     case OMPD_taskwait:
10198     case OMPD_taskgroup:
10199     case OMPD_atomic:
10200     case OMPD_flush:
10201     case OMPD_teams:
10202     case OMPD_target_data:
10203     case OMPD_distribute:
10204     case OMPD_distribute_simd:
10205     case OMPD_distribute_parallel_for:
10206     case OMPD_distribute_parallel_for_simd:
10207     case OMPD_teams_distribute:
10208     case OMPD_teams_distribute_simd:
10209     case OMPD_teams_distribute_parallel_for:
10210     case OMPD_teams_distribute_parallel_for_simd:
10211     case OMPD_declare_simd:
10212     case OMPD_declare_variant:
10213     case OMPD_declare_target:
10214     case OMPD_end_declare_target:
10215     case OMPD_declare_reduction:
10216     case OMPD_declare_mapper:
10217     case OMPD_taskloop:
10218     case OMPD_taskloop_simd:
10219     case OMPD_target:
10220     case OMPD_target_simd:
10221     case OMPD_target_teams_distribute:
10222     case OMPD_target_teams_distribute_simd:
10223     case OMPD_target_teams_distribute_parallel_for:
10224     case OMPD_target_teams_distribute_parallel_for_simd:
10225     case OMPD_target_teams:
10226     case OMPD_target_parallel:
10227     case OMPD_target_parallel_for:
10228     case OMPD_target_parallel_for_simd:
10229     case OMPD_requires:
10230     case OMPD_unknown:
10231       llvm_unreachable("Unexpected standalone target data directive.");
10232       break;
10233     }
10234     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10235   };
10236 
10237   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10238                              CodeGenFunction &CGF, PrePostActionTy &) {
10239     // Fill up the arrays with all the mapped variables.
10240     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10241     MappableExprsHandler::MapValuesArrayTy Pointers;
10242     MappableExprsHandler::MapValuesArrayTy Sizes;
10243     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10244 
10245     // Get map clause information.
10246     MappableExprsHandler MEHandler(D, CGF);
10247     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10248 
10249     TargetDataInfo Info;
10250     // Fill up the arrays and create the arguments.
10251     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10252     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10253                                  Info.PointersArray, Info.SizesArray,
10254                                  Info.MapTypesArray, Info);
10255     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10256     InputInfo.BasePointersArray =
10257         Address(Info.BasePointersArray, CGM.getPointerAlign());
10258     InputInfo.PointersArray =
10259         Address(Info.PointersArray, CGM.getPointerAlign());
10260     InputInfo.SizesArray =
10261         Address(Info.SizesArray, CGM.getPointerAlign());
10262     MapTypesArray = Info.MapTypesArray;
10263     if (D.hasClausesOfKind<OMPDependClause>())
10264       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10265     else
10266       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10267   };
10268 
10269   if (IfCond) {
10270     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10271                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10272   } else {
10273     RegionCodeGenTy ThenRCG(TargetThenGen);
10274     ThenRCG(CGF);
10275   }
10276 }
10277 
10278 namespace {
10279   /// Kind of parameter in a function with 'declare simd' directive.
10280   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10281   /// Attribute set of the parameter.
10282   struct ParamAttrTy {
10283     ParamKindTy Kind = Vector;
10284     llvm::APSInt StrideOrArg;
10285     llvm::APSInt Alignment;
10286   };
10287 } // namespace
10288 
10289 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10290                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10291   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10292   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10293   // of that clause. The VLEN value must be power of 2.
10294   // In other case the notion of the function`s "characteristic data type" (CDT)
10295   // is used to compute the vector length.
10296   // CDT is defined in the following order:
10297   //   a) For non-void function, the CDT is the return type.
10298   //   b) If the function has any non-uniform, non-linear parameters, then the
10299   //   CDT is the type of the first such parameter.
10300   //   c) If the CDT determined by a) or b) above is struct, union, or class
10301   //   type which is pass-by-value (except for the type that maps to the
10302   //   built-in complex data type), the characteristic data type is int.
10303   //   d) If none of the above three cases is applicable, the CDT is int.
10304   // The VLEN is then determined based on the CDT and the size of vector
10305   // register of that ISA for which current vector version is generated. The
10306   // VLEN is computed using the formula below:
10307   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10308   // where vector register size specified in section 3.2.1 Registers and the
10309   // Stack Frame of original AMD64 ABI document.
10310   QualType RetType = FD->getReturnType();
10311   if (RetType.isNull())
10312     return 0;
10313   ASTContext &C = FD->getASTContext();
10314   QualType CDT;
10315   if (!RetType.isNull() && !RetType->isVoidType()) {
10316     CDT = RetType;
10317   } else {
10318     unsigned Offset = 0;
10319     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10320       if (ParamAttrs[Offset].Kind == Vector)
10321         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10322       ++Offset;
10323     }
10324     if (CDT.isNull()) {
10325       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10326         if (ParamAttrs[I + Offset].Kind == Vector) {
10327           CDT = FD->getParamDecl(I)->getType();
10328           break;
10329         }
10330       }
10331     }
10332   }
10333   if (CDT.isNull())
10334     CDT = C.IntTy;
10335   CDT = CDT->getCanonicalTypeUnqualified();
10336   if (CDT->isRecordType() || CDT->isUnionType())
10337     CDT = C.IntTy;
10338   return C.getTypeSize(CDT);
10339 }
10340 
10341 static void
10342 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10343                            const llvm::APSInt &VLENVal,
10344                            ArrayRef<ParamAttrTy> ParamAttrs,
10345                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10346   struct ISADataTy {
10347     char ISA;
10348     unsigned VecRegSize;
10349   };
10350   ISADataTy ISAData[] = {
10351       {
10352           'b', 128
10353       }, // SSE
10354       {
10355           'c', 256
10356       }, // AVX
10357       {
10358           'd', 256
10359       }, // AVX2
10360       {
10361           'e', 512
10362       }, // AVX512
10363   };
10364   llvm::SmallVector<char, 2> Masked;
10365   switch (State) {
10366   case OMPDeclareSimdDeclAttr::BS_Undefined:
10367     Masked.push_back('N');
10368     Masked.push_back('M');
10369     break;
10370   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10371     Masked.push_back('N');
10372     break;
10373   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10374     Masked.push_back('M');
10375     break;
10376   }
10377   for (char Mask : Masked) {
10378     for (const ISADataTy &Data : ISAData) {
10379       SmallString<256> Buffer;
10380       llvm::raw_svector_ostream Out(Buffer);
10381       Out << "_ZGV" << Data.ISA << Mask;
10382       if (!VLENVal) {
10383         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10384         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10385         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10386       } else {
10387         Out << VLENVal;
10388       }
10389       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10390         switch (ParamAttr.Kind){
10391         case LinearWithVarStride:
10392           Out << 's' << ParamAttr.StrideOrArg;
10393           break;
10394         case Linear:
10395           Out << 'l';
10396           if (!!ParamAttr.StrideOrArg)
10397             Out << ParamAttr.StrideOrArg;
10398           break;
10399         case Uniform:
10400           Out << 'u';
10401           break;
10402         case Vector:
10403           Out << 'v';
10404           break;
10405         }
10406         if (!!ParamAttr.Alignment)
10407           Out << 'a' << ParamAttr.Alignment;
10408       }
10409       Out << '_' << Fn->getName();
10410       Fn->addFnAttr(Out.str());
10411     }
10412   }
10413 }
10414 
10415 // This are the Functions that are needed to mangle the name of the
10416 // vector functions generated by the compiler, according to the rules
10417 // defined in the "Vector Function ABI specifications for AArch64",
10418 // available at
10419 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10420 
10421 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10422 ///
10423 /// TODO: Need to implement the behavior for reference marked with a
10424 /// var or no linear modifiers (1.b in the section). For this, we
10425 /// need to extend ParamKindTy to support the linear modifiers.
10426 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10427   QT = QT.getCanonicalType();
10428 
10429   if (QT->isVoidType())
10430     return false;
10431 
10432   if (Kind == ParamKindTy::Uniform)
10433     return false;
10434 
10435   if (Kind == ParamKindTy::Linear)
10436     return false;
10437 
10438   // TODO: Handle linear references with modifiers
10439 
10440   if (Kind == ParamKindTy::LinearWithVarStride)
10441     return false;
10442 
10443   return true;
10444 }
10445 
10446 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10447 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10448   QT = QT.getCanonicalType();
10449   unsigned Size = C.getTypeSize(QT);
10450 
10451   // Only scalars and complex within 16 bytes wide set PVB to true.
10452   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10453     return false;
10454 
10455   if (QT->isFloatingType())
10456     return true;
10457 
10458   if (QT->isIntegerType())
10459     return true;
10460 
10461   if (QT->isPointerType())
10462     return true;
10463 
10464   // TODO: Add support for complex types (section 3.1.2, item 2).
10465 
10466   return false;
10467 }
10468 
10469 /// Computes the lane size (LS) of a return type or of an input parameter,
10470 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10471 /// TODO: Add support for references, section 3.2.1, item 1.
10472 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10473   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10474     QualType PTy = QT.getCanonicalType()->getPointeeType();
10475     if (getAArch64PBV(PTy, C))
10476       return C.getTypeSize(PTy);
10477   }
10478   if (getAArch64PBV(QT, C))
10479     return C.getTypeSize(QT);
10480 
10481   return C.getTypeSize(C.getUIntPtrType());
10482 }
10483 
10484 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10485 // signature of the scalar function, as defined in 3.2.2 of the
10486 // AAVFABI.
10487 static std::tuple<unsigned, unsigned, bool>
10488 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10489   QualType RetType = FD->getReturnType().getCanonicalType();
10490 
10491   ASTContext &C = FD->getASTContext();
10492 
10493   bool OutputBecomesInput = false;
10494 
10495   llvm::SmallVector<unsigned, 8> Sizes;
10496   if (!RetType->isVoidType()) {
10497     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10498     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10499       OutputBecomesInput = true;
10500   }
10501   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10502     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10503     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10504   }
10505 
10506   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10507   // The LS of a function parameter / return value can only be a power
10508   // of 2, starting from 8 bits, up to 128.
10509   assert(std::all_of(Sizes.begin(), Sizes.end(),
10510                      [](unsigned Size) {
10511                        return Size == 8 || Size == 16 || Size == 32 ||
10512                               Size == 64 || Size == 128;
10513                      }) &&
10514          "Invalid size");
10515 
10516   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10517                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10518                          OutputBecomesInput);
10519 }
10520 
10521 /// Mangle the parameter part of the vector function name according to
10522 /// their OpenMP classification. The mangling function is defined in
10523 /// section 3.5 of the AAVFABI.
10524 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10525   SmallString<256> Buffer;
10526   llvm::raw_svector_ostream Out(Buffer);
10527   for (const auto &ParamAttr : ParamAttrs) {
10528     switch (ParamAttr.Kind) {
10529     case LinearWithVarStride:
10530       Out << "ls" << ParamAttr.StrideOrArg;
10531       break;
10532     case Linear:
10533       Out << 'l';
10534       // Don't print the step value if it is not present or if it is
10535       // equal to 1.
10536       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10537         Out << ParamAttr.StrideOrArg;
10538       break;
10539     case Uniform:
10540       Out << 'u';
10541       break;
10542     case Vector:
10543       Out << 'v';
10544       break;
10545     }
10546 
10547     if (!!ParamAttr.Alignment)
10548       Out << 'a' << ParamAttr.Alignment;
10549   }
10550 
10551   return Out.str();
10552 }
10553 
10554 // Function used to add the attribute. The parameter `VLEN` is
10555 // templated to allow the use of "x" when targeting scalable functions
10556 // for SVE.
10557 template <typename T>
10558 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10559                                  char ISA, StringRef ParSeq,
10560                                  StringRef MangledName, bool OutputBecomesInput,
10561                                  llvm::Function *Fn) {
10562   SmallString<256> Buffer;
10563   llvm::raw_svector_ostream Out(Buffer);
10564   Out << Prefix << ISA << LMask << VLEN;
10565   if (OutputBecomesInput)
10566     Out << "v";
10567   Out << ParSeq << "_" << MangledName;
10568   Fn->addFnAttr(Out.str());
10569 }
10570 
10571 // Helper function to generate the Advanced SIMD names depending on
10572 // the value of the NDS when simdlen is not present.
10573 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10574                                       StringRef Prefix, char ISA,
10575                                       StringRef ParSeq, StringRef MangledName,
10576                                       bool OutputBecomesInput,
10577                                       llvm::Function *Fn) {
10578   switch (NDS) {
10579   case 8:
10580     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10581                          OutputBecomesInput, Fn);
10582     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10583                          OutputBecomesInput, Fn);
10584     break;
10585   case 16:
10586     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10587                          OutputBecomesInput, Fn);
10588     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10589                          OutputBecomesInput, Fn);
10590     break;
10591   case 32:
10592     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10593                          OutputBecomesInput, Fn);
10594     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10595                          OutputBecomesInput, Fn);
10596     break;
10597   case 64:
10598   case 128:
10599     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10600                          OutputBecomesInput, Fn);
10601     break;
10602   default:
10603     llvm_unreachable("Scalar type is too wide.");
10604   }
10605 }
10606 
10607 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10608 static void emitAArch64DeclareSimdFunction(
10609     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10610     ArrayRef<ParamAttrTy> ParamAttrs,
10611     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10612     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10613 
10614   // Get basic data for building the vector signature.
10615   const auto Data = getNDSWDS(FD, ParamAttrs);
10616   const unsigned NDS = std::get<0>(Data);
10617   const unsigned WDS = std::get<1>(Data);
10618   const bool OutputBecomesInput = std::get<2>(Data);
10619 
10620   // Check the values provided via `simdlen` by the user.
10621   // 1. A `simdlen(1)` doesn't produce vector signatures,
10622   if (UserVLEN == 1) {
10623     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10624         DiagnosticsEngine::Warning,
10625         "The clause simdlen(1) has no effect when targeting aarch64.");
10626     CGM.getDiags().Report(SLoc, DiagID);
10627     return;
10628   }
10629 
10630   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10631   // Advanced SIMD output.
10632   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10633     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10634         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10635                                     "power of 2 when targeting Advanced SIMD.");
10636     CGM.getDiags().Report(SLoc, DiagID);
10637     return;
10638   }
10639 
10640   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10641   // limits.
10642   if (ISA == 's' && UserVLEN != 0) {
10643     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10644       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10645           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10646                                       "lanes in the architectural constraints "
10647                                       "for SVE (min is 128-bit, max is "
10648                                       "2048-bit, by steps of 128-bit)");
10649       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10650       return;
10651     }
10652   }
10653 
10654   // Sort out parameter sequence.
10655   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10656   StringRef Prefix = "_ZGV";
10657   // Generate simdlen from user input (if any).
10658   if (UserVLEN) {
10659     if (ISA == 's') {
10660       // SVE generates only a masked function.
10661       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10662                            OutputBecomesInput, Fn);
10663     } else {
10664       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10665       // Advanced SIMD generates one or two functions, depending on
10666       // the `[not]inbranch` clause.
10667       switch (State) {
10668       case OMPDeclareSimdDeclAttr::BS_Undefined:
10669         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10670                              OutputBecomesInput, Fn);
10671         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10672                              OutputBecomesInput, Fn);
10673         break;
10674       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10675         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10676                              OutputBecomesInput, Fn);
10677         break;
10678       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10679         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10680                              OutputBecomesInput, Fn);
10681         break;
10682       }
10683     }
10684   } else {
10685     // If no user simdlen is provided, follow the AAVFABI rules for
10686     // generating the vector length.
10687     if (ISA == 's') {
10688       // SVE, section 3.4.1, item 1.
10689       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10690                            OutputBecomesInput, Fn);
10691     } else {
10692       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10693       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10694       // two vector names depending on the use of the clause
10695       // `[not]inbranch`.
10696       switch (State) {
10697       case OMPDeclareSimdDeclAttr::BS_Undefined:
10698         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10699                                   OutputBecomesInput, Fn);
10700         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10701                                   OutputBecomesInput, Fn);
10702         break;
10703       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10704         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10705                                   OutputBecomesInput, Fn);
10706         break;
10707       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10708         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10709                                   OutputBecomesInput, Fn);
10710         break;
10711       }
10712     }
10713   }
10714 }
10715 
10716 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10717                                               llvm::Function *Fn) {
10718   ASTContext &C = CGM.getContext();
10719   FD = FD->getMostRecentDecl();
10720   // Map params to their positions in function decl.
10721   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10722   if (isa<CXXMethodDecl>(FD))
10723     ParamPositions.try_emplace(FD, 0);
10724   unsigned ParamPos = ParamPositions.size();
10725   for (const ParmVarDecl *P : FD->parameters()) {
10726     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10727     ++ParamPos;
10728   }
10729   while (FD) {
10730     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10731       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10732       // Mark uniform parameters.
10733       for (const Expr *E : Attr->uniforms()) {
10734         E = E->IgnoreParenImpCasts();
10735         unsigned Pos;
10736         if (isa<CXXThisExpr>(E)) {
10737           Pos = ParamPositions[FD];
10738         } else {
10739           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10740                                 ->getCanonicalDecl();
10741           Pos = ParamPositions[PVD];
10742         }
10743         ParamAttrs[Pos].Kind = Uniform;
10744       }
10745       // Get alignment info.
10746       auto NI = Attr->alignments_begin();
10747       for (const Expr *E : Attr->aligneds()) {
10748         E = E->IgnoreParenImpCasts();
10749         unsigned Pos;
10750         QualType ParmTy;
10751         if (isa<CXXThisExpr>(E)) {
10752           Pos = ParamPositions[FD];
10753           ParmTy = E->getType();
10754         } else {
10755           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10756                                 ->getCanonicalDecl();
10757           Pos = ParamPositions[PVD];
10758           ParmTy = PVD->getType();
10759         }
10760         ParamAttrs[Pos].Alignment =
10761             (*NI)
10762                 ? (*NI)->EvaluateKnownConstInt(C)
10763                 : llvm::APSInt::getUnsigned(
10764                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10765                           .getQuantity());
10766         ++NI;
10767       }
10768       // Mark linear parameters.
10769       auto SI = Attr->steps_begin();
10770       auto MI = Attr->modifiers_begin();
10771       for (const Expr *E : Attr->linears()) {
10772         E = E->IgnoreParenImpCasts();
10773         unsigned Pos;
10774         if (isa<CXXThisExpr>(E)) {
10775           Pos = ParamPositions[FD];
10776         } else {
10777           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10778                                 ->getCanonicalDecl();
10779           Pos = ParamPositions[PVD];
10780         }
10781         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10782         ParamAttr.Kind = Linear;
10783         if (*SI) {
10784           Expr::EvalResult Result;
10785           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10786             if (const auto *DRE =
10787                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10788               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10789                 ParamAttr.Kind = LinearWithVarStride;
10790                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10791                     ParamPositions[StridePVD->getCanonicalDecl()]);
10792               }
10793             }
10794           } else {
10795             ParamAttr.StrideOrArg = Result.Val.getInt();
10796           }
10797         }
10798         ++SI;
10799         ++MI;
10800       }
10801       llvm::APSInt VLENVal;
10802       SourceLocation ExprLoc;
10803       const Expr *VLENExpr = Attr->getSimdlen();
10804       if (VLENExpr) {
10805         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10806         ExprLoc = VLENExpr->getExprLoc();
10807       }
10808       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10809       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10810           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10811         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10812       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10813         unsigned VLEN = VLENVal.getExtValue();
10814         StringRef MangledName = Fn->getName();
10815         if (CGM.getTarget().hasFeature("sve"))
10816           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10817                                          MangledName, 's', 128, Fn, ExprLoc);
10818         if (CGM.getTarget().hasFeature("neon"))
10819           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10820                                          MangledName, 'n', 128, Fn, ExprLoc);
10821       }
10822     }
10823     FD = FD->getPreviousDecl();
10824   }
10825 }
10826 
10827 namespace {
10828 /// Cleanup action for doacross support.
10829 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10830 public:
10831   static const int DoacrossFinArgs = 2;
10832 
10833 private:
10834   llvm::FunctionCallee RTLFn;
10835   llvm::Value *Args[DoacrossFinArgs];
10836 
10837 public:
10838   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10839                     ArrayRef<llvm::Value *> CallArgs)
10840       : RTLFn(RTLFn) {
10841     assert(CallArgs.size() == DoacrossFinArgs);
10842     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10843   }
10844   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10845     if (!CGF.HaveInsertPoint())
10846       return;
10847     CGF.EmitRuntimeCall(RTLFn, Args);
10848   }
10849 };
10850 } // namespace
10851 
10852 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10853                                        const OMPLoopDirective &D,
10854                                        ArrayRef<Expr *> NumIterations) {
10855   if (!CGF.HaveInsertPoint())
10856     return;
10857 
10858   ASTContext &C = CGM.getContext();
10859   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10860   RecordDecl *RD;
10861   if (KmpDimTy.isNull()) {
10862     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10863     //  kmp_int64 lo; // lower
10864     //  kmp_int64 up; // upper
10865     //  kmp_int64 st; // stride
10866     // };
10867     RD = C.buildImplicitRecord("kmp_dim");
10868     RD->startDefinition();
10869     addFieldToRecordDecl(C, RD, Int64Ty);
10870     addFieldToRecordDecl(C, RD, Int64Ty);
10871     addFieldToRecordDecl(C, RD, Int64Ty);
10872     RD->completeDefinition();
10873     KmpDimTy = C.getRecordType(RD);
10874   } else {
10875     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10876   }
10877   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10878   QualType ArrayTy =
10879       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10880 
10881   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10882   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10883   enum { LowerFD = 0, UpperFD, StrideFD };
10884   // Fill dims with data.
10885   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10886     LValue DimsLVal = CGF.MakeAddrLValue(
10887         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10888     // dims.upper = num_iterations;
10889     LValue UpperLVal = CGF.EmitLValueForField(
10890         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10891     llvm::Value *NumIterVal =
10892         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10893                                  D.getNumIterations()->getType(), Int64Ty,
10894                                  D.getNumIterations()->getExprLoc());
10895     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10896     // dims.stride = 1;
10897     LValue StrideLVal = CGF.EmitLValueForField(
10898         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10899     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10900                           StrideLVal);
10901   }
10902 
10903   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10904   // kmp_int32 num_dims, struct kmp_dim * dims);
10905   llvm::Value *Args[] = {
10906       emitUpdateLocation(CGF, D.getBeginLoc()),
10907       getThreadID(CGF, D.getBeginLoc()),
10908       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10909       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10910           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10911           CGM.VoidPtrTy)};
10912 
10913   llvm::FunctionCallee RTLFn =
10914       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10915   CGF.EmitRuntimeCall(RTLFn, Args);
10916   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10917       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10918   llvm::FunctionCallee FiniRTLFn =
10919       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10920   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10921                                              llvm::makeArrayRef(FiniArgs));
10922 }
10923 
10924 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10925                                           const OMPDependClause *C) {
10926   QualType Int64Ty =
10927       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10928   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10929   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10930       Int64Ty, Size, ArrayType::Normal, 0);
10931   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10932   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10933     const Expr *CounterVal = C->getLoopData(I);
10934     assert(CounterVal);
10935     llvm::Value *CntVal = CGF.EmitScalarConversion(
10936         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10937         CounterVal->getExprLoc());
10938     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10939                           /*Volatile=*/false, Int64Ty);
10940   }
10941   llvm::Value *Args[] = {
10942       emitUpdateLocation(CGF, C->getBeginLoc()),
10943       getThreadID(CGF, C->getBeginLoc()),
10944       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10945   llvm::FunctionCallee RTLFn;
10946   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10947     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10948   } else {
10949     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10950     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10951   }
10952   CGF.EmitRuntimeCall(RTLFn, Args);
10953 }
10954 
10955 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10956                                llvm::FunctionCallee Callee,
10957                                ArrayRef<llvm::Value *> Args) const {
10958   assert(Loc.isValid() && "Outlined function call location must be valid.");
10959   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10960 
10961   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10962     if (Fn->doesNotThrow()) {
10963       CGF.EmitNounwindRuntimeCall(Fn, Args);
10964       return;
10965     }
10966   }
10967   CGF.EmitRuntimeCall(Callee, Args);
10968 }
10969 
10970 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10971     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10972     ArrayRef<llvm::Value *> Args) const {
10973   emitCall(CGF, Loc, OutlinedFn, Args);
10974 }
10975 
10976 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10977   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10978     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10979       HasEmittedDeclareTargetRegion = true;
10980 }
10981 
10982 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10983                                              const VarDecl *NativeParam,
10984                                              const VarDecl *TargetParam) const {
10985   return CGF.GetAddrOfLocalVar(NativeParam);
10986 }
10987 
10988 namespace {
10989 /// Cleanup action for allocate support.
10990 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10991 public:
10992   static const int CleanupArgs = 3;
10993 
10994 private:
10995   llvm::FunctionCallee RTLFn;
10996   llvm::Value *Args[CleanupArgs];
10997 
10998 public:
10999   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11000                        ArrayRef<llvm::Value *> CallArgs)
11001       : RTLFn(RTLFn) {
11002     assert(CallArgs.size() == CleanupArgs &&
11003            "Size of arguments does not match.");
11004     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11005   }
11006   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11007     if (!CGF.HaveInsertPoint())
11008       return;
11009     CGF.EmitRuntimeCall(RTLFn, Args);
11010   }
11011 };
11012 } // namespace
11013 
11014 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11015                                                    const VarDecl *VD) {
11016   if (!VD)
11017     return Address::invalid();
11018   const VarDecl *CVD = VD->getCanonicalDecl();
11019   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11020     return Address::invalid();
11021   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11022   // Use the default allocation.
11023   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11024       !AA->getAllocator())
11025     return Address::invalid();
11026   llvm::Value *Size;
11027   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11028   if (CVD->getType()->isVariablyModifiedType()) {
11029     Size = CGF.getTypeSize(CVD->getType());
11030     // Align the size: ((size + align - 1) / align) * align
11031     Size = CGF.Builder.CreateNUWAdd(
11032         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11033     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11034     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11035   } else {
11036     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11037     Size = CGM.getSize(Sz.alignTo(Align));
11038   }
11039   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11040   assert(AA->getAllocator() &&
11041          "Expected allocator expression for non-default allocator.");
11042   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11043   // According to the standard, the original allocator type is a enum (integer).
11044   // Convert to pointer type, if required.
11045   if (Allocator->getType()->isIntegerTy())
11046     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11047   else if (Allocator->getType()->isPointerTy())
11048     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11049                                                                 CGM.VoidPtrTy);
11050   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11051 
11052   llvm::Value *Addr =
11053       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11054                           CVD->getName() + ".void.addr");
11055   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11056                                                               Allocator};
11057   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11058 
11059   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11060                                                 llvm::makeArrayRef(FiniArgs));
11061   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11062       Addr,
11063       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11064       CVD->getName() + ".addr");
11065   return Address(Addr, Align);
11066 }
11067 
11068 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11069     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11070     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11071   llvm_unreachable("Not supported in SIMD-only mode");
11072 }
11073 
11074 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11075     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11076     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11077   llvm_unreachable("Not supported in SIMD-only mode");
11078 }
11079 
11080 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11081     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11082     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11083     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11084     bool Tied, unsigned &NumberOfParts) {
11085   llvm_unreachable("Not supported in SIMD-only mode");
11086 }
11087 
11088 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11089                                            SourceLocation Loc,
11090                                            llvm::Function *OutlinedFn,
11091                                            ArrayRef<llvm::Value *> CapturedVars,
11092                                            const Expr *IfCond) {
11093   llvm_unreachable("Not supported in SIMD-only mode");
11094 }
11095 
11096 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11097     CodeGenFunction &CGF, StringRef CriticalName,
11098     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11099     const Expr *Hint) {
11100   llvm_unreachable("Not supported in SIMD-only mode");
11101 }
11102 
11103 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11104                                            const RegionCodeGenTy &MasterOpGen,
11105                                            SourceLocation Loc) {
11106   llvm_unreachable("Not supported in SIMD-only mode");
11107 }
11108 
11109 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11110                                             SourceLocation Loc) {
11111   llvm_unreachable("Not supported in SIMD-only mode");
11112 }
11113 
11114 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11115     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11116     SourceLocation Loc) {
11117   llvm_unreachable("Not supported in SIMD-only mode");
11118 }
11119 
11120 void CGOpenMPSIMDRuntime::emitSingleRegion(
11121     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11122     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11123     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11124     ArrayRef<const Expr *> AssignmentOps) {
11125   llvm_unreachable("Not supported in SIMD-only mode");
11126 }
11127 
11128 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11129                                             const RegionCodeGenTy &OrderedOpGen,
11130                                             SourceLocation Loc,
11131                                             bool IsThreads) {
11132   llvm_unreachable("Not supported in SIMD-only mode");
11133 }
11134 
11135 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11136                                           SourceLocation Loc,
11137                                           OpenMPDirectiveKind Kind,
11138                                           bool EmitChecks,
11139                                           bool ForceSimpleCall) {
11140   llvm_unreachable("Not supported in SIMD-only mode");
11141 }
11142 
11143 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11144     CodeGenFunction &CGF, SourceLocation Loc,
11145     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11146     bool Ordered, const DispatchRTInput &DispatchValues) {
11147   llvm_unreachable("Not supported in SIMD-only mode");
11148 }
11149 
11150 void CGOpenMPSIMDRuntime::emitForStaticInit(
11151     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11152     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11153   llvm_unreachable("Not supported in SIMD-only mode");
11154 }
11155 
11156 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11157     CodeGenFunction &CGF, SourceLocation Loc,
11158     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11159   llvm_unreachable("Not supported in SIMD-only mode");
11160 }
11161 
11162 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11163                                                      SourceLocation Loc,
11164                                                      unsigned IVSize,
11165                                                      bool IVSigned) {
11166   llvm_unreachable("Not supported in SIMD-only mode");
11167 }
11168 
11169 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11170                                               SourceLocation Loc,
11171                                               OpenMPDirectiveKind DKind) {
11172   llvm_unreachable("Not supported in SIMD-only mode");
11173 }
11174 
11175 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11176                                               SourceLocation Loc,
11177                                               unsigned IVSize, bool IVSigned,
11178                                               Address IL, Address LB,
11179                                               Address UB, Address ST) {
11180   llvm_unreachable("Not supported in SIMD-only mode");
11181 }
11182 
11183 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11184                                                llvm::Value *NumThreads,
11185                                                SourceLocation Loc) {
11186   llvm_unreachable("Not supported in SIMD-only mode");
11187 }
11188 
11189 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11190                                              OpenMPProcBindClauseKind ProcBind,
11191                                              SourceLocation Loc) {
11192   llvm_unreachable("Not supported in SIMD-only mode");
11193 }
11194 
11195 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11196                                                     const VarDecl *VD,
11197                                                     Address VDAddr,
11198                                                     SourceLocation Loc) {
11199   llvm_unreachable("Not supported in SIMD-only mode");
11200 }
11201 
11202 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11203     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11204     CodeGenFunction *CGF) {
11205   llvm_unreachable("Not supported in SIMD-only mode");
11206 }
11207 
11208 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11209     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11210   llvm_unreachable("Not supported in SIMD-only mode");
11211 }
11212 
11213 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11214                                     ArrayRef<const Expr *> Vars,
11215                                     SourceLocation Loc) {
11216   llvm_unreachable("Not supported in SIMD-only mode");
11217 }
11218 
11219 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11220                                        const OMPExecutableDirective &D,
11221                                        llvm::Function *TaskFunction,
11222                                        QualType SharedsTy, Address Shareds,
11223                                        const Expr *IfCond,
11224                                        const OMPTaskDataTy &Data) {
11225   llvm_unreachable("Not supported in SIMD-only mode");
11226 }
11227 
11228 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11229     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11230     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11231     const Expr *IfCond, const OMPTaskDataTy &Data) {
11232   llvm_unreachable("Not supported in SIMD-only mode");
11233 }
11234 
11235 void CGOpenMPSIMDRuntime::emitReduction(
11236     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11237     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11238     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11239   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11240   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11241                                  ReductionOps, Options);
11242 }
11243 
11244 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11245     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11246     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11247   llvm_unreachable("Not supported in SIMD-only mode");
11248 }
11249 
11250 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11251                                                   SourceLocation Loc,
11252                                                   ReductionCodeGen &RCG,
11253                                                   unsigned N) {
11254   llvm_unreachable("Not supported in SIMD-only mode");
11255 }
11256 
11257 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11258                                                   SourceLocation Loc,
11259                                                   llvm::Value *ReductionsPtr,
11260                                                   LValue SharedLVal) {
11261   llvm_unreachable("Not supported in SIMD-only mode");
11262 }
11263 
11264 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11265                                            SourceLocation Loc) {
11266   llvm_unreachable("Not supported in SIMD-only mode");
11267 }
11268 
11269 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11270     CodeGenFunction &CGF, SourceLocation Loc,
11271     OpenMPDirectiveKind CancelRegion) {
11272   llvm_unreachable("Not supported in SIMD-only mode");
11273 }
11274 
11275 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11276                                          SourceLocation Loc, const Expr *IfCond,
11277                                          OpenMPDirectiveKind CancelRegion) {
11278   llvm_unreachable("Not supported in SIMD-only mode");
11279 }
11280 
11281 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11282     const OMPExecutableDirective &D, StringRef ParentName,
11283     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11284     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11285   llvm_unreachable("Not supported in SIMD-only mode");
11286 }
11287 
11288 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
11289                                          const OMPExecutableDirective &D,
11290                                          llvm::Function *OutlinedFn,
11291                                          llvm::Value *OutlinedFnID,
11292                                          const Expr *IfCond,
11293                                          const Expr *Device) {
11294   llvm_unreachable("Not supported in SIMD-only mode");
11295 }
11296 
11297 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11298   llvm_unreachable("Not supported in SIMD-only mode");
11299 }
11300 
11301 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11302   llvm_unreachable("Not supported in SIMD-only mode");
11303 }
11304 
11305 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11306   return false;
11307 }
11308 
11309 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
11310   return nullptr;
11311 }
11312 
11313 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11314                                         const OMPExecutableDirective &D,
11315                                         SourceLocation Loc,
11316                                         llvm::Function *OutlinedFn,
11317                                         ArrayRef<llvm::Value *> CapturedVars) {
11318   llvm_unreachable("Not supported in SIMD-only mode");
11319 }
11320 
11321 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11322                                              const Expr *NumTeams,
11323                                              const Expr *ThreadLimit,
11324                                              SourceLocation Loc) {
11325   llvm_unreachable("Not supported in SIMD-only mode");
11326 }
11327 
11328 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11329     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11330     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11331   llvm_unreachable("Not supported in SIMD-only mode");
11332 }
11333 
11334 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11335     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11336     const Expr *Device) {
11337   llvm_unreachable("Not supported in SIMD-only mode");
11338 }
11339 
11340 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11341                                            const OMPLoopDirective &D,
11342                                            ArrayRef<Expr *> NumIterations) {
11343   llvm_unreachable("Not supported in SIMD-only mode");
11344 }
11345 
11346 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11347                                               const OMPDependClause *C) {
11348   llvm_unreachable("Not supported in SIMD-only mode");
11349 }
11350 
11351 const VarDecl *
11352 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11353                                         const VarDecl *NativeParam) const {
11354   llvm_unreachable("Not supported in SIMD-only mode");
11355 }
11356 
11357 Address
11358 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11359                                          const VarDecl *NativeParam,
11360                                          const VarDecl *TargetParam) const {
11361   llvm_unreachable("Not supported in SIMD-only mode");
11362 }
11363