1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/OpenMPKinds.h"
25 #include "clang/CodeGen/ConstantInitBuilder.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/SetOperations.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Bitcode/BitcodeReader.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/AtomicOrdering.h"
35 #include "llvm/Support/Format.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <cassert>
38 
39 using namespace clang;
40 using namespace CodeGen;
41 using namespace llvm::omp;
42 
43 namespace {
44 /// Base class for handling code generation inside OpenMP regions.
45 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
46 public:
47   /// Kinds of OpenMP regions used in codegen.
48   enum CGOpenMPRegionKind {
49     /// Region with outlined function for standalone 'parallel'
50     /// directive.
51     ParallelOutlinedRegion,
52     /// Region with outlined function for standalone 'task' directive.
53     TaskOutlinedRegion,
54     /// Region for constructs that do not require function outlining,
55     /// like 'for', 'sections', 'atomic' etc. directives.
56     InlinedRegion,
57     /// Region with outlined function for standalone 'target' directive.
58     TargetRegion,
59   };
60 
61   CGOpenMPRegionInfo(const CapturedStmt &CS,
62                      const CGOpenMPRegionKind RegionKind,
63                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
64                      bool HasCancel)
65       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
66         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
67 
68   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
69                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70                      bool HasCancel)
71       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
72         Kind(Kind), HasCancel(HasCancel) {}
73 
74   /// Get a variable or parameter for storing global thread id
75   /// inside OpenMP construct.
76   virtual const VarDecl *getThreadIDVariable() const = 0;
77 
78   /// Emit the captured statement body.
79   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
80 
81   /// Get an LValue for the current ThreadID variable.
82   /// \return LValue for thread id variable. This LValue always has type int32*.
83   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
84 
85   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
86 
87   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
88 
89   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
90 
91   bool hasCancel() const { return HasCancel; }
92 
93   static bool classof(const CGCapturedStmtInfo *Info) {
94     return Info->getKind() == CR_OpenMP;
95   }
96 
97   ~CGOpenMPRegionInfo() override = default;
98 
99 protected:
100   CGOpenMPRegionKind RegionKind;
101   RegionCodeGenTy CodeGen;
102   OpenMPDirectiveKind Kind;
103   bool HasCancel;
104 };
105 
106 /// API for captured statement code generation in OpenMP constructs.
107 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
108 public:
109   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
110                              const RegionCodeGenTy &CodeGen,
111                              OpenMPDirectiveKind Kind, bool HasCancel,
112                              StringRef HelperName)
113       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
114                            HasCancel),
115         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
116     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
117   }
118 
119   /// Get a variable or parameter for storing global thread id
120   /// inside OpenMP construct.
121   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
122 
123   /// Get the name of the capture helper.
124   StringRef getHelperName() const override { return HelperName; }
125 
126   static bool classof(const CGCapturedStmtInfo *Info) {
127     return CGOpenMPRegionInfo::classof(Info) &&
128            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
129                ParallelOutlinedRegion;
130   }
131 
132 private:
133   /// A variable or parameter storing global thread id for OpenMP
134   /// constructs.
135   const VarDecl *ThreadIDVar;
136   StringRef HelperName;
137 };
138 
139 /// API for captured statement code generation in OpenMP constructs.
140 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
141 public:
142   class UntiedTaskActionTy final : public PrePostActionTy {
143     bool Untied;
144     const VarDecl *PartIDVar;
145     const RegionCodeGenTy UntiedCodeGen;
146     llvm::SwitchInst *UntiedSwitch = nullptr;
147 
148   public:
149     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
150                        const RegionCodeGenTy &UntiedCodeGen)
151         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
152     void Enter(CodeGenFunction &CGF) override {
153       if (Untied) {
154         // Emit task switching point.
155         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
156             CGF.GetAddrOfLocalVar(PartIDVar),
157             PartIDVar->getType()->castAs<PointerType>());
158         llvm::Value *Res =
159             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
160         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
161         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
162         CGF.EmitBlock(DoneBB);
163         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
164         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
165         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
166                               CGF.Builder.GetInsertBlock());
167         emitUntiedSwitch(CGF);
168       }
169     }
170     void emitUntiedSwitch(CodeGenFunction &CGF) const {
171       if (Untied) {
172         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
173             CGF.GetAddrOfLocalVar(PartIDVar),
174             PartIDVar->getType()->castAs<PointerType>());
175         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176                               PartIdLVal);
177         UntiedCodeGen(CGF);
178         CodeGenFunction::JumpDest CurPoint =
179             CGF.getJumpDestInCurrentScope(".untied.next.");
180         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
181         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
182         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               CGF.Builder.GetInsertBlock());
184         CGF.EmitBranchThroughCleanup(CurPoint);
185         CGF.EmitBlock(CurPoint.getBlock());
186       }
187     }
188     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
189   };
190   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
191                                  const VarDecl *ThreadIDVar,
192                                  const RegionCodeGenTy &CodeGen,
193                                  OpenMPDirectiveKind Kind, bool HasCancel,
194                                  const UntiedTaskActionTy &Action)
195       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
196         ThreadIDVar(ThreadIDVar), Action(Action) {
197     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
198   }
199 
200   /// Get a variable or parameter for storing global thread id
201   /// inside OpenMP construct.
202   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
203 
204   /// Get an LValue for the current ThreadID variable.
205   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
206 
207   /// Get the name of the capture helper.
208   StringRef getHelperName() const override { return ".omp_outlined."; }
209 
210   void emitUntiedSwitch(CodeGenFunction &CGF) override {
211     Action.emitUntiedSwitch(CGF);
212   }
213 
214   static bool classof(const CGCapturedStmtInfo *Info) {
215     return CGOpenMPRegionInfo::classof(Info) &&
216            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
217                TaskOutlinedRegion;
218   }
219 
220 private:
221   /// A variable or parameter storing global thread id for OpenMP
222   /// constructs.
223   const VarDecl *ThreadIDVar;
224   /// Action for emitting code for untied tasks.
225   const UntiedTaskActionTy &Action;
226 };
227 
228 /// API for inlined captured statement code generation in OpenMP
229 /// constructs.
230 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
231 public:
232   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
233                             const RegionCodeGenTy &CodeGen,
234                             OpenMPDirectiveKind Kind, bool HasCancel)
235       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
236         OldCSI(OldCSI),
237         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
238 
239   // Retrieve the value of the context parameter.
240   llvm::Value *getContextValue() const override {
241     if (OuterRegionInfo)
242       return OuterRegionInfo->getContextValue();
243     llvm_unreachable("No context value for inlined OpenMP region");
244   }
245 
246   void setContextValue(llvm::Value *V) override {
247     if (OuterRegionInfo) {
248       OuterRegionInfo->setContextValue(V);
249       return;
250     }
251     llvm_unreachable("No context value for inlined OpenMP region");
252   }
253 
254   /// Lookup the captured field decl for a variable.
255   const FieldDecl *lookup(const VarDecl *VD) const override {
256     if (OuterRegionInfo)
257       return OuterRegionInfo->lookup(VD);
258     // If there is no outer outlined region,no need to lookup in a list of
259     // captured variables, we can use the original one.
260     return nullptr;
261   }
262 
263   FieldDecl *getThisFieldDecl() const override {
264     if (OuterRegionInfo)
265       return OuterRegionInfo->getThisFieldDecl();
266     return nullptr;
267   }
268 
269   /// Get a variable or parameter for storing global thread id
270   /// inside OpenMP construct.
271   const VarDecl *getThreadIDVariable() const override {
272     if (OuterRegionInfo)
273       return OuterRegionInfo->getThreadIDVariable();
274     return nullptr;
275   }
276 
277   /// Get an LValue for the current ThreadID variable.
278   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
281     llvm_unreachable("No LValue for inlined OpenMP construct");
282   }
283 
284   /// Get the name of the capture helper.
285   StringRef getHelperName() const override {
286     if (auto *OuterRegionInfo = getOldCSI())
287       return OuterRegionInfo->getHelperName();
288     llvm_unreachable("No helper name for inlined OpenMP construct");
289   }
290 
291   void emitUntiedSwitch(CodeGenFunction &CGF) override {
292     if (OuterRegionInfo)
293       OuterRegionInfo->emitUntiedSwitch(CGF);
294   }
295 
296   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
297 
298   static bool classof(const CGCapturedStmtInfo *Info) {
299     return CGOpenMPRegionInfo::classof(Info) &&
300            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
301   }
302 
303   ~CGOpenMPInlinedRegionInfo() override = default;
304 
305 private:
306   /// CodeGen info about outer OpenMP region.
307   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
308   CGOpenMPRegionInfo *OuterRegionInfo;
309 };
310 
311 /// API for captured statement code generation in OpenMP target
312 /// constructs. For this captures, implicit parameters are used instead of the
313 /// captured fields. The name of the target region has to be unique in a given
314 /// application so it is provided by the client, because only the client has
315 /// the information to generate that.
316 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
317 public:
318   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
319                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
320       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
321                            /*HasCancel=*/false),
322         HelperName(HelperName) {}
323 
324   /// This is unused for target regions because each starts executing
325   /// with a single thread.
326   const VarDecl *getThreadIDVariable() const override { return nullptr; }
327 
328   /// Get the name of the capture helper.
329   StringRef getHelperName() const override { return HelperName; }
330 
331   static bool classof(const CGCapturedStmtInfo *Info) {
332     return CGOpenMPRegionInfo::classof(Info) &&
333            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
334   }
335 
336 private:
337   StringRef HelperName;
338 };
339 
340 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
341   llvm_unreachable("No codegen for expressions");
342 }
343 /// API for generation of expressions captured in a innermost OpenMP
344 /// region.
345 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
346 public:
347   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
348       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
349                                   OMPD_unknown,
350                                   /*HasCancel=*/false),
351         PrivScope(CGF) {
352     // Make sure the globals captured in the provided statement are local by
353     // using the privatization logic. We assume the same variable is not
354     // captured more than once.
355     for (const auto &C : CS.captures()) {
356       if (!C.capturesVariable() && !C.capturesVariableByCopy())
357         continue;
358 
359       const VarDecl *VD = C.getCapturedVar();
360       if (VD->isLocalVarDeclOrParm())
361         continue;
362 
363       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
364                       /*RefersToEnclosingVariableOrCapture=*/false,
365                       VD->getType().getNonReferenceType(), VK_LValue,
366                       C.getLocation());
367       PrivScope.addPrivate(
368           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
369     }
370     (void)PrivScope.Privatize();
371   }
372 
373   /// Lookup the captured field decl for a variable.
374   const FieldDecl *lookup(const VarDecl *VD) const override {
375     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
376       return FD;
377     return nullptr;
378   }
379 
380   /// Emit the captured statement body.
381   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
382     llvm_unreachable("No body for expressions");
383   }
384 
385   /// Get a variable or parameter for storing global thread id
386   /// inside OpenMP construct.
387   const VarDecl *getThreadIDVariable() const override {
388     llvm_unreachable("No thread id for expressions");
389   }
390 
391   /// Get the name of the capture helper.
392   StringRef getHelperName() const override {
393     llvm_unreachable("No helper name for expressions");
394   }
395 
396   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
397 
398 private:
399   /// Private scope to capture global variables.
400   CodeGenFunction::OMPPrivateScope PrivScope;
401 };
402 
403 /// RAII for emitting code of OpenMP constructs.
404 class InlinedOpenMPRegionRAII {
405   CodeGenFunction &CGF;
406   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
407   FieldDecl *LambdaThisCaptureField = nullptr;
408   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
409 
410 public:
411   /// Constructs region for combined constructs.
412   /// \param CodeGen Code generation sequence for combined directives. Includes
413   /// a list of functions used for code generation of implicitly inlined
414   /// regions.
415   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
416                           OpenMPDirectiveKind Kind, bool HasCancel)
417       : CGF(CGF) {
418     // Start emission for the construct.
419     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
420         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
421     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
422     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
423     CGF.LambdaThisCaptureField = nullptr;
424     BlockInfo = CGF.BlockInfo;
425     CGF.BlockInfo = nullptr;
426   }
427 
428   ~InlinedOpenMPRegionRAII() {
429     // Restore original CapturedStmtInfo only if we're done with code emission.
430     auto *OldCSI =
431         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
432     delete CGF.CapturedStmtInfo;
433     CGF.CapturedStmtInfo = OldCSI;
434     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
436     CGF.BlockInfo = BlockInfo;
437   }
438 };
439 
440 /// Values for bit flags used in the ident_t to describe the fields.
441 /// All enumeric elements are named and described in accordance with the code
442 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
443 enum OpenMPLocationFlags : unsigned {
444   /// Use trampoline for internal microtask.
445   OMP_IDENT_IMD = 0x01,
446   /// Use c-style ident structure.
447   OMP_IDENT_KMPC = 0x02,
448   /// Atomic reduction option for kmpc_reduce.
449   OMP_ATOMIC_REDUCE = 0x10,
450   /// Explicit 'barrier' directive.
451   OMP_IDENT_BARRIER_EXPL = 0x20,
452   /// Implicit barrier in code.
453   OMP_IDENT_BARRIER_IMPL = 0x40,
454   /// Implicit barrier in 'for' directive.
455   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
456   /// Implicit barrier in 'sections' directive.
457   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
458   /// Implicit barrier in 'single' directive.
459   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
460   /// Call of __kmp_for_static_init for static loop.
461   OMP_IDENT_WORK_LOOP = 0x200,
462   /// Call of __kmp_for_static_init for sections.
463   OMP_IDENT_WORK_SECTIONS = 0x400,
464   /// Call of __kmp_for_static_init for distribute.
465   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
466   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
467 };
468 
469 namespace {
470 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
471 /// Values for bit flags for marking which requires clauses have been used.
472 enum OpenMPOffloadingRequiresDirFlags : int64_t {
473   /// flag undefined.
474   OMP_REQ_UNDEFINED               = 0x000,
475   /// no requires clause present.
476   OMP_REQ_NONE                    = 0x001,
477   /// reverse_offload clause.
478   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
479   /// unified_address clause.
480   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
481   /// unified_shared_memory clause.
482   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
483   /// dynamic_allocators clause.
484   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
485   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
486 };
487 
488 enum OpenMPOffloadingReservedDeviceIDs {
489   /// Device ID if the device was not defined, runtime should get it
490   /// from environment variables in the spec.
491   OMP_DEVICEID_UNDEF = -1,
492 };
493 } // anonymous namespace
494 
495 /// Describes ident structure that describes a source location.
496 /// All descriptions are taken from
497 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
498 /// Original structure:
499 /// typedef struct ident {
500 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
501 ///                                  see above  */
502 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
503 ///                                  KMP_IDENT_KMPC identifies this union
504 ///                                  member  */
505 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
506 ///                                  see above */
507 ///#if USE_ITT_BUILD
508 ///                            /*  but currently used for storing
509 ///                                region-specific ITT */
510 ///                            /*  contextual information. */
511 ///#endif /* USE_ITT_BUILD */
512 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
513 ///                                 C++  */
514 ///    char const *psource;    /**< String describing the source location.
515 ///                            The string is composed of semi-colon separated
516 //                             fields which describe the source file,
517 ///                            the function and a pair of line numbers that
518 ///                            delimit the construct.
519 ///                             */
520 /// } ident_t;
521 enum IdentFieldIndex {
522   /// might be used in Fortran
523   IdentField_Reserved_1,
524   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
525   IdentField_Flags,
526   /// Not really used in Fortran any more
527   IdentField_Reserved_2,
528   /// Source[4] in Fortran, do not use for C++
529   IdentField_Reserved_3,
530   /// String describing the source location. The string is composed of
531   /// semi-colon separated fields which describe the source file, the function
532   /// and a pair of line numbers that delimit the construct.
533   IdentField_PSource
534 };
535 
536 /// Schedule types for 'omp for' loops (these enumerators are taken from
537 /// the enum sched_type in kmp.h).
538 enum OpenMPSchedType {
539   /// Lower bound for default (unordered) versions.
540   OMP_sch_lower = 32,
541   OMP_sch_static_chunked = 33,
542   OMP_sch_static = 34,
543   OMP_sch_dynamic_chunked = 35,
544   OMP_sch_guided_chunked = 36,
545   OMP_sch_runtime = 37,
546   OMP_sch_auto = 38,
547   /// static with chunk adjustment (e.g., simd)
548   OMP_sch_static_balanced_chunked = 45,
549   /// Lower bound for 'ordered' versions.
550   OMP_ord_lower = 64,
551   OMP_ord_static_chunked = 65,
552   OMP_ord_static = 66,
553   OMP_ord_dynamic_chunked = 67,
554   OMP_ord_guided_chunked = 68,
555   OMP_ord_runtime = 69,
556   OMP_ord_auto = 70,
557   OMP_sch_default = OMP_sch_static,
558   /// dist_schedule types
559   OMP_dist_sch_static_chunked = 91,
560   OMP_dist_sch_static = 92,
561   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
562   /// Set if the monotonic schedule modifier was present.
563   OMP_sch_modifier_monotonic = (1 << 29),
564   /// Set if the nonmonotonic schedule modifier was present.
565   OMP_sch_modifier_nonmonotonic = (1 << 30),
566 };
567 
568 enum OpenMPRTLFunction {
569   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
570   /// kmpc_micro microtask, ...);
571   OMPRTL__kmpc_fork_call,
572   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
573   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
574   OMPRTL__kmpc_threadprivate_cached,
575   /// Call to void __kmpc_threadprivate_register( ident_t *,
576   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
577   OMPRTL__kmpc_threadprivate_register,
578   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
579   OMPRTL__kmpc_global_thread_num,
580   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
581   // kmp_critical_name *crit);
582   OMPRTL__kmpc_critical,
583   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
584   // global_tid, kmp_critical_name *crit, uintptr_t hint);
585   OMPRTL__kmpc_critical_with_hint,
586   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
587   // kmp_critical_name *crit);
588   OMPRTL__kmpc_end_critical,
589   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
590   // global_tid);
591   OMPRTL__kmpc_cancel_barrier,
592   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
593   OMPRTL__kmpc_barrier,
594   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_for_static_fini,
596   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
597   // global_tid);
598   OMPRTL__kmpc_serialized_parallel,
599   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
600   // global_tid);
601   OMPRTL__kmpc_end_serialized_parallel,
602   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
603   // kmp_int32 num_threads);
604   OMPRTL__kmpc_push_num_threads,
605   // Call to void __kmpc_flush(ident_t *loc);
606   OMPRTL__kmpc_flush,
607   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_master,
609   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
610   OMPRTL__kmpc_end_master,
611   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
612   // int end_part);
613   OMPRTL__kmpc_omp_taskyield,
614   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
615   OMPRTL__kmpc_single,
616   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
617   OMPRTL__kmpc_end_single,
618   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
619   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
620   // kmp_routine_entry_t *task_entry);
621   OMPRTL__kmpc_omp_task_alloc,
622   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
623   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
624   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
625   // kmp_int64 device_id);
626   OMPRTL__kmpc_omp_target_task_alloc,
627   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
628   // new_task);
629   OMPRTL__kmpc_omp_task,
630   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
631   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
632   // kmp_int32 didit);
633   OMPRTL__kmpc_copyprivate,
634   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
636   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
637   OMPRTL__kmpc_reduce,
638   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
639   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
640   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
641   // *lck);
642   OMPRTL__kmpc_reduce_nowait,
643   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
644   // kmp_critical_name *lck);
645   OMPRTL__kmpc_end_reduce,
646   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
647   // kmp_critical_name *lck);
648   OMPRTL__kmpc_end_reduce_nowait,
649   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
650   // kmp_task_t * new_task);
651   OMPRTL__kmpc_omp_task_begin_if0,
652   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
653   // kmp_task_t * new_task);
654   OMPRTL__kmpc_omp_task_complete_if0,
655   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_ordered,
657   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
658   OMPRTL__kmpc_end_ordered,
659   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
660   // global_tid);
661   OMPRTL__kmpc_omp_taskwait,
662   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
663   OMPRTL__kmpc_taskgroup,
664   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
665   OMPRTL__kmpc_end_taskgroup,
666   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
667   // int proc_bind);
668   OMPRTL__kmpc_push_proc_bind,
669   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
670   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
671   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
672   OMPRTL__kmpc_omp_task_with_deps,
673   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
674   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
675   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
676   OMPRTL__kmpc_omp_wait_deps,
677   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
678   // global_tid, kmp_int32 cncl_kind);
679   OMPRTL__kmpc_cancellationpoint,
680   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
681   // kmp_int32 cncl_kind);
682   OMPRTL__kmpc_cancel,
683   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
684   // kmp_int32 num_teams, kmp_int32 thread_limit);
685   OMPRTL__kmpc_push_num_teams,
686   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
687   // microtask, ...);
688   OMPRTL__kmpc_fork_teams,
689   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
690   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
691   // sched, kmp_uint64 grainsize, void *task_dup);
692   OMPRTL__kmpc_taskloop,
693   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
694   // num_dims, struct kmp_dim *dims);
695   OMPRTL__kmpc_doacross_init,
696   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
697   OMPRTL__kmpc_doacross_fini,
698   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
699   // *vec);
700   OMPRTL__kmpc_doacross_post,
701   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
702   // *vec);
703   OMPRTL__kmpc_doacross_wait,
704   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
705   // *data);
706   OMPRTL__kmpc_task_reduction_init,
707   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
708   // *d);
709   OMPRTL__kmpc_task_reduction_get_th_data,
710   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
711   OMPRTL__kmpc_alloc,
712   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
713   OMPRTL__kmpc_free,
714 
715   //
716   // Offloading related calls
717   //
718   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
719   // size);
720   OMPRTL__kmpc_push_target_tripcount,
721   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
722   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
723   // *arg_types);
724   OMPRTL__tgt_target,
725   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
726   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
727   // *arg_types);
728   OMPRTL__tgt_target_nowait,
729   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
730   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
731   // *arg_types, int32_t num_teams, int32_t thread_limit);
732   OMPRTL__tgt_target_teams,
733   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
734   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
735   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
736   OMPRTL__tgt_target_teams_nowait,
737   // Call to void __tgt_register_requires(int64_t flags);
738   OMPRTL__tgt_register_requires,
739   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
740   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
741   OMPRTL__tgt_target_data_begin,
742   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
743   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
744   // *arg_types);
745   OMPRTL__tgt_target_data_begin_nowait,
746   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
747   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
748   OMPRTL__tgt_target_data_end,
749   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
750   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
751   // *arg_types);
752   OMPRTL__tgt_target_data_end_nowait,
753   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
754   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
755   OMPRTL__tgt_target_data_update,
756   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
757   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
758   // *arg_types);
759   OMPRTL__tgt_target_data_update_nowait,
760   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
761   OMPRTL__tgt_mapper_num_components,
762   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
763   // *base, void *begin, int64_t size, int64_t type);
764   OMPRTL__tgt_push_mapper_component,
765 };
766 
767 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
768 /// region.
769 class CleanupTy final : public EHScopeStack::Cleanup {
770   PrePostActionTy *Action;
771 
772 public:
773   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
774   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
775     if (!CGF.HaveInsertPoint())
776       return;
777     Action->Exit(CGF);
778   }
779 };
780 
781 } // anonymous namespace
782 
783 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
784   CodeGenFunction::RunCleanupsScope Scope(CGF);
785   if (PrePostAction) {
786     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
787     Callback(CodeGen, CGF, *PrePostAction);
788   } else {
789     PrePostActionTy Action;
790     Callback(CodeGen, CGF, Action);
791   }
792 }
793 
794 /// Check if the combiner is a call to UDR combiner and if it is so return the
795 /// UDR decl used for reduction.
796 static const OMPDeclareReductionDecl *
797 getReductionInit(const Expr *ReductionOp) {
798   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
799     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
800       if (const auto *DRE =
801               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
802         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
803           return DRD;
804   return nullptr;
805 }
806 
807 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
808                                              const OMPDeclareReductionDecl *DRD,
809                                              const Expr *InitOp,
810                                              Address Private, Address Original,
811                                              QualType Ty) {
812   if (DRD->getInitializer()) {
813     std::pair<llvm::Function *, llvm::Function *> Reduction =
814         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
815     const auto *CE = cast<CallExpr>(InitOp);
816     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
817     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
818     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
819     const auto *LHSDRE =
820         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
821     const auto *RHSDRE =
822         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
823     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
824     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
825                             [=]() { return Private; });
826     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
827                             [=]() { return Original; });
828     (void)PrivateScope.Privatize();
829     RValue Func = RValue::get(Reduction.second);
830     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
831     CGF.EmitIgnoredExpr(InitOp);
832   } else {
833     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
834     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
835     auto *GV = new llvm::GlobalVariable(
836         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
837         llvm::GlobalValue::PrivateLinkage, Init, Name);
838     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
839     RValue InitRVal;
840     switch (CGF.getEvaluationKind(Ty)) {
841     case TEK_Scalar:
842       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
843       break;
844     case TEK_Complex:
845       InitRVal =
846           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
847       break;
848     case TEK_Aggregate:
849       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
850       break;
851     }
852     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
853     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
854     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
855                          /*IsInitializer=*/false);
856   }
857 }
858 
859 /// Emit initialization of arrays of complex types.
860 /// \param DestAddr Address of the array.
861 /// \param Type Type of array.
862 /// \param Init Initial expression of array.
863 /// \param SrcAddr Address of the original array.
864 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
865                                  QualType Type, bool EmitDeclareReductionInit,
866                                  const Expr *Init,
867                                  const OMPDeclareReductionDecl *DRD,
868                                  Address SrcAddr = Address::invalid()) {
869   // Perform element-by-element initialization.
870   QualType ElementTy;
871 
872   // Drill down to the base element type on both arrays.
873   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
874   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
875   DestAddr =
876       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
877   if (DRD)
878     SrcAddr =
879         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
880 
881   llvm::Value *SrcBegin = nullptr;
882   if (DRD)
883     SrcBegin = SrcAddr.getPointer();
884   llvm::Value *DestBegin = DestAddr.getPointer();
885   // Cast from pointer to array type to pointer to single element.
886   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
887   // The basic structure here is a while-do loop.
888   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
889   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
890   llvm::Value *IsEmpty =
891       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
892   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
893 
894   // Enter the loop body, making that address the current address.
895   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
896   CGF.EmitBlock(BodyBB);
897 
898   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
899 
900   llvm::PHINode *SrcElementPHI = nullptr;
901   Address SrcElementCurrent = Address::invalid();
902   if (DRD) {
903     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
904                                           "omp.arraycpy.srcElementPast");
905     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
906     SrcElementCurrent =
907         Address(SrcElementPHI,
908                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
909   }
910   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
911       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
912   DestElementPHI->addIncoming(DestBegin, EntryBB);
913   Address DestElementCurrent =
914       Address(DestElementPHI,
915               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
916 
917   // Emit copy.
918   {
919     CodeGenFunction::RunCleanupsScope InitScope(CGF);
920     if (EmitDeclareReductionInit) {
921       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
922                                        SrcElementCurrent, ElementTy);
923     } else
924       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
925                            /*IsInitializer=*/false);
926   }
927 
928   if (DRD) {
929     // Shift the address forward by one element.
930     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
931         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
932     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
933   }
934 
935   // Shift the address forward by one element.
936   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
937       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
938   // Check whether we've reached the end.
939   llvm::Value *Done =
940       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
941   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
942   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
943 
944   // Done.
945   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
946 }
947 
948 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
949   return CGF.EmitOMPSharedLValue(E);
950 }
951 
952 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
953                                             const Expr *E) {
954   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
955     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
956   return LValue();
957 }
958 
959 void ReductionCodeGen::emitAggregateInitialization(
960     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
961     const OMPDeclareReductionDecl *DRD) {
962   // Emit VarDecl with copy init for arrays.
963   // Get the address of the original variable captured in current
964   // captured region.
965   const auto *PrivateVD =
966       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
967   bool EmitDeclareReductionInit =
968       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
969   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
970                        EmitDeclareReductionInit,
971                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
972                                                 : PrivateVD->getInit(),
973                        DRD, SharedLVal.getAddress(CGF));
974 }
975 
976 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
977                                    ArrayRef<const Expr *> Privates,
978                                    ArrayRef<const Expr *> ReductionOps) {
979   ClausesData.reserve(Shareds.size());
980   SharedAddresses.reserve(Shareds.size());
981   Sizes.reserve(Shareds.size());
982   BaseDecls.reserve(Shareds.size());
983   auto IPriv = Privates.begin();
984   auto IRed = ReductionOps.begin();
985   for (const Expr *Ref : Shareds) {
986     ClausesData.emplace_back(Ref, *IPriv, *IRed);
987     std::advance(IPriv, 1);
988     std::advance(IRed, 1);
989   }
990 }
991 
992 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
993   assert(SharedAddresses.size() == N &&
994          "Number of generated lvalues must be exactly N.");
995   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
996   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
997   SharedAddresses.emplace_back(First, Second);
998 }
999 
1000 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1001   const auto *PrivateVD =
1002       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1003   QualType PrivateType = PrivateVD->getType();
1004   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1005   if (!PrivateType->isVariablyModifiedType()) {
1006     Sizes.emplace_back(
1007         CGF.getTypeSize(
1008             SharedAddresses[N].first.getType().getNonReferenceType()),
1009         nullptr);
1010     return;
1011   }
1012   llvm::Value *Size;
1013   llvm::Value *SizeInChars;
1014   auto *ElemType = cast<llvm::PointerType>(
1015                        SharedAddresses[N].first.getPointer(CGF)->getType())
1016                        ->getElementType();
1017   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1018   if (AsArraySection) {
1019     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1020                                      SharedAddresses[N].first.getPointer(CGF));
1021     Size = CGF.Builder.CreateNUWAdd(
1022         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1023     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1024   } else {
1025     SizeInChars = CGF.getTypeSize(
1026         SharedAddresses[N].first.getType().getNonReferenceType());
1027     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1028   }
1029   Sizes.emplace_back(SizeInChars, Size);
1030   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1031       CGF,
1032       cast<OpaqueValueExpr>(
1033           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1034       RValue::get(Size));
1035   CGF.EmitVariablyModifiedType(PrivateType);
1036 }
1037 
1038 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1039                                          llvm::Value *Size) {
1040   const auto *PrivateVD =
1041       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1042   QualType PrivateType = PrivateVD->getType();
1043   if (!PrivateType->isVariablyModifiedType()) {
1044     assert(!Size && !Sizes[N].second &&
1045            "Size should be nullptr for non-variably modified reduction "
1046            "items.");
1047     return;
1048   }
1049   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1050       CGF,
1051       cast<OpaqueValueExpr>(
1052           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1053       RValue::get(Size));
1054   CGF.EmitVariablyModifiedType(PrivateType);
1055 }
1056 
1057 void ReductionCodeGen::emitInitialization(
1058     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1059     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1060   assert(SharedAddresses.size() > N && "No variable was generated");
1061   const auto *PrivateVD =
1062       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1063   const OMPDeclareReductionDecl *DRD =
1064       getReductionInit(ClausesData[N].ReductionOp);
1065   QualType PrivateType = PrivateVD->getType();
1066   PrivateAddr = CGF.Builder.CreateElementBitCast(
1067       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1068   QualType SharedType = SharedAddresses[N].first.getType();
1069   SharedLVal = CGF.MakeAddrLValue(
1070       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1071                                        CGF.ConvertTypeForMem(SharedType)),
1072       SharedType, SharedAddresses[N].first.getBaseInfo(),
1073       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1074   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1075     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1076   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1077     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1078                                      PrivateAddr, SharedLVal.getAddress(CGF),
1079                                      SharedLVal.getType());
1080   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1081              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1082     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1083                          PrivateVD->getType().getQualifiers(),
1084                          /*IsInitializer=*/false);
1085   }
1086 }
1087 
1088 bool ReductionCodeGen::needCleanups(unsigned N) {
1089   const auto *PrivateVD =
1090       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1091   QualType PrivateType = PrivateVD->getType();
1092   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1093   return DTorKind != QualType::DK_none;
1094 }
1095 
1096 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1097                                     Address PrivateAddr) {
1098   const auto *PrivateVD =
1099       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1100   QualType PrivateType = PrivateVD->getType();
1101   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1102   if (needCleanups(N)) {
1103     PrivateAddr = CGF.Builder.CreateElementBitCast(
1104         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1105     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1106   }
1107 }
1108 
1109 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1110                           LValue BaseLV) {
1111   BaseTy = BaseTy.getNonReferenceType();
1112   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1113          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1114     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1115       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1116     } else {
1117       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1118       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1119     }
1120     BaseTy = BaseTy->getPointeeType();
1121   }
1122   return CGF.MakeAddrLValue(
1123       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1124                                        CGF.ConvertTypeForMem(ElTy)),
1125       BaseLV.getType(), BaseLV.getBaseInfo(),
1126       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1127 }
1128 
1129 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1130                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1131                           llvm::Value *Addr) {
1132   Address Tmp = Address::invalid();
1133   Address TopTmp = Address::invalid();
1134   Address MostTopTmp = Address::invalid();
1135   BaseTy = BaseTy.getNonReferenceType();
1136   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1137          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1138     Tmp = CGF.CreateMemTemp(BaseTy);
1139     if (TopTmp.isValid())
1140       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1141     else
1142       MostTopTmp = Tmp;
1143     TopTmp = Tmp;
1144     BaseTy = BaseTy->getPointeeType();
1145   }
1146   llvm::Type *Ty = BaseLVType;
1147   if (Tmp.isValid())
1148     Ty = Tmp.getElementType();
1149   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1150   if (Tmp.isValid()) {
1151     CGF.Builder.CreateStore(Addr, Tmp);
1152     return MostTopTmp;
1153   }
1154   return Address(Addr, BaseLVAlignment);
1155 }
1156 
1157 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1158   const VarDecl *OrigVD = nullptr;
1159   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1160     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1161     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1162       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1163     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1164       Base = TempASE->getBase()->IgnoreParenImpCasts();
1165     DE = cast<DeclRefExpr>(Base);
1166     OrigVD = cast<VarDecl>(DE->getDecl());
1167   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1168     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1169     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1170       Base = TempASE->getBase()->IgnoreParenImpCasts();
1171     DE = cast<DeclRefExpr>(Base);
1172     OrigVD = cast<VarDecl>(DE->getDecl());
1173   }
1174   return OrigVD;
1175 }
1176 
1177 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1178                                                Address PrivateAddr) {
1179   const DeclRefExpr *DE;
1180   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1181     BaseDecls.emplace_back(OrigVD);
1182     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1183     LValue BaseLValue =
1184         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1185                     OriginalBaseLValue);
1186     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1187         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1188     llvm::Value *PrivatePointer =
1189         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1190             PrivateAddr.getPointer(),
1191             SharedAddresses[N].first.getAddress(CGF).getType());
1192     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1193     return castToBase(CGF, OrigVD->getType(),
1194                       SharedAddresses[N].first.getType(),
1195                       OriginalBaseLValue.getAddress(CGF).getType(),
1196                       OriginalBaseLValue.getAlignment(), Ptr);
1197   }
1198   BaseDecls.emplace_back(
1199       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1200   return PrivateAddr;
1201 }
1202 
1203 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1204   const OMPDeclareReductionDecl *DRD =
1205       getReductionInit(ClausesData[N].ReductionOp);
1206   return DRD && DRD->getInitializer();
1207 }
1208 
1209 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1210   return CGF.EmitLoadOfPointerLValue(
1211       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1212       getThreadIDVariable()->getType()->castAs<PointerType>());
1213 }
1214 
1215 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1216   if (!CGF.HaveInsertPoint())
1217     return;
1218   // 1.2.2 OpenMP Language Terminology
1219   // Structured block - An executable statement with a single entry at the
1220   // top and a single exit at the bottom.
1221   // The point of exit cannot be a branch out of the structured block.
1222   // longjmp() and throw() must not violate the entry/exit criteria.
1223   CGF.EHStack.pushTerminate();
1224   CodeGen(CGF);
1225   CGF.EHStack.popTerminate();
1226 }
1227 
1228 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1229     CodeGenFunction &CGF) {
1230   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1231                             getThreadIDVariable()->getType(),
1232                             AlignmentSource::Decl);
1233 }
1234 
1235 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1236                                        QualType FieldTy) {
1237   auto *Field = FieldDecl::Create(
1238       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1239       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1240       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1241   Field->setAccess(AS_public);
1242   DC->addDecl(Field);
1243   return Field;
1244 }
1245 
1246 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1247                                  StringRef Separator)
1248     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1249       OffloadEntriesInfoManager(CGM) {
1250   ASTContext &C = CGM.getContext();
1251   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1252   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1253   RD->startDefinition();
1254   // reserved_1
1255   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1256   // flags
1257   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1258   // reserved_2
1259   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1260   // reserved_3
1261   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1262   // psource
1263   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1264   RD->completeDefinition();
1265   IdentQTy = C.getRecordType(RD);
1266   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1267   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1268 
1269   loadOffloadInfoMetadata();
1270 }
1271 
1272 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1273                                             const GlobalDecl &OldGD,
1274                                             llvm::GlobalValue *OrigAddr,
1275                                             bool IsForDefinition) {
1276   // Emit at least a definition for the aliasee if the the address of the
1277   // original function is requested.
1278   if (IsForDefinition || OrigAddr)
1279     (void)CGM.GetAddrOfGlobal(NewGD);
1280   StringRef NewMangledName = CGM.getMangledName(NewGD);
1281   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1282   if (Addr && !Addr->isDeclaration()) {
1283     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1284     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1285     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1286 
1287     // Create a reference to the named value.  This ensures that it is emitted
1288     // if a deferred decl.
1289     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1290 
1291     // Create the new alias itself, but don't set a name yet.
1292     auto *GA =
1293         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1294 
1295     if (OrigAddr) {
1296       assert(OrigAddr->isDeclaration() && "Expected declaration");
1297 
1298       GA->takeName(OrigAddr);
1299       OrigAddr->replaceAllUsesWith(
1300           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1301       OrigAddr->eraseFromParent();
1302     } else {
1303       GA->setName(CGM.getMangledName(OldGD));
1304     }
1305 
1306     // Set attributes which are particular to an alias; this is a
1307     // specialization of the attributes which may be set on a global function.
1308     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1309         D->isWeakImported())
1310       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1311 
1312     CGM.SetCommonAttributes(OldGD, GA);
1313     return true;
1314   }
1315   return false;
1316 }
1317 
1318 void CGOpenMPRuntime::clear() {
1319   InternalVars.clear();
1320   // Clean non-target variable declarations possibly used only in debug info.
1321   for (const auto &Data : EmittedNonTargetVariables) {
1322     if (!Data.getValue().pointsToAliveValue())
1323       continue;
1324     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1325     if (!GV)
1326       continue;
1327     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1328       continue;
1329     GV->eraseFromParent();
1330   }
1331   // Emit aliases for the deferred aliasees.
1332   for (const auto &Pair : DeferredVariantFunction) {
1333     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1334     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1335     // If not able to emit alias, just emit original declaration.
1336     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1337                                 /*IsForDefinition=*/false);
1338   }
1339 }
1340 
1341 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1342   SmallString<128> Buffer;
1343   llvm::raw_svector_ostream OS(Buffer);
1344   StringRef Sep = FirstSeparator;
1345   for (StringRef Part : Parts) {
1346     OS << Sep << Part;
1347     Sep = Separator;
1348   }
1349   return std::string(OS.str());
1350 }
1351 
1352 static llvm::Function *
1353 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1354                           const Expr *CombinerInitializer, const VarDecl *In,
1355                           const VarDecl *Out, bool IsCombiner) {
1356   // void .omp_combiner.(Ty *in, Ty *out);
1357   ASTContext &C = CGM.getContext();
1358   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1359   FunctionArgList Args;
1360   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1361                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1362   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1363                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1364   Args.push_back(&OmpOutParm);
1365   Args.push_back(&OmpInParm);
1366   const CGFunctionInfo &FnInfo =
1367       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1368   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1369   std::string Name = CGM.getOpenMPRuntime().getName(
1370       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1371   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1372                                     Name, &CGM.getModule());
1373   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1374   if (CGM.getLangOpts().Optimize) {
1375     Fn->removeFnAttr(llvm::Attribute::NoInline);
1376     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1377     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1378   }
1379   CodeGenFunction CGF(CGM);
1380   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1381   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1382   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1383                     Out->getLocation());
1384   CodeGenFunction::OMPPrivateScope Scope(CGF);
1385   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1386   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1387     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1388         .getAddress(CGF);
1389   });
1390   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1391   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1392     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1393         .getAddress(CGF);
1394   });
1395   (void)Scope.Privatize();
1396   if (!IsCombiner && Out->hasInit() &&
1397       !CGF.isTrivialInitializer(Out->getInit())) {
1398     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1399                          Out->getType().getQualifiers(),
1400                          /*IsInitializer=*/true);
1401   }
1402   if (CombinerInitializer)
1403     CGF.EmitIgnoredExpr(CombinerInitializer);
1404   Scope.ForceCleanup();
1405   CGF.FinishFunction();
1406   return Fn;
1407 }
1408 
1409 void CGOpenMPRuntime::emitUserDefinedReduction(
1410     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1411   if (UDRMap.count(D) > 0)
1412     return;
1413   llvm::Function *Combiner = emitCombinerOrInitializer(
1414       CGM, D->getType(), D->getCombiner(),
1415       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1416       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1417       /*IsCombiner=*/true);
1418   llvm::Function *Initializer = nullptr;
1419   if (const Expr *Init = D->getInitializer()) {
1420     Initializer = emitCombinerOrInitializer(
1421         CGM, D->getType(),
1422         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1423                                                                      : nullptr,
1424         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1425         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1426         /*IsCombiner=*/false);
1427   }
1428   UDRMap.try_emplace(D, Combiner, Initializer);
1429   if (CGF) {
1430     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1431     Decls.second.push_back(D);
1432   }
1433 }
1434 
1435 std::pair<llvm::Function *, llvm::Function *>
1436 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1437   auto I = UDRMap.find(D);
1438   if (I != UDRMap.end())
1439     return I->second;
1440   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1441   return UDRMap.lookup(D);
1442 }
1443 
1444 namespace {
1445 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1446 // Builder if one is present.
1447 struct PushAndPopStackRAII {
1448   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1449                       bool HasCancel)
1450       : OMPBuilder(OMPBuilder) {
1451     if (!OMPBuilder)
1452       return;
1453 
1454     // The following callback is the crucial part of clangs cleanup process.
1455     //
1456     // NOTE:
1457     // Once the OpenMPIRBuilder is used to create parallel regions (and
1458     // similar), the cancellation destination (Dest below) is determined via
1459     // IP. That means if we have variables to finalize we split the block at IP,
1460     // use the new block (=BB) as destination to build a JumpDest (via
1461     // getJumpDestInCurrentScope(BB)) which then is fed to
1462     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1463     // to push & pop an FinalizationInfo object.
1464     // The FiniCB will still be needed but at the point where the
1465     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1466     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1467       assert(IP.getBlock()->end() == IP.getPoint() &&
1468              "Clang CG should cause non-terminated block!");
1469       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1470       CGF.Builder.restoreIP(IP);
1471       CodeGenFunction::JumpDest Dest =
1472           CGF.getOMPCancelDestination(OMPD_parallel);
1473       CGF.EmitBranchThroughCleanup(Dest);
1474     };
1475 
1476     // TODO: Remove this once we emit parallel regions through the
1477     //       OpenMPIRBuilder as it can do this setup internally.
1478     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1479         {FiniCB, OMPD_parallel, HasCancel});
1480     OMPBuilder->pushFinalizationCB(std::move(FI));
1481   }
1482   ~PushAndPopStackRAII() {
1483     if (OMPBuilder)
1484       OMPBuilder->popFinalizationCB();
1485   }
1486   llvm::OpenMPIRBuilder *OMPBuilder;
1487 };
1488 } // namespace
1489 
1490 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1491     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1492     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1493     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1494   assert(ThreadIDVar->getType()->isPointerType() &&
1495          "thread id variable must be of type kmp_int32 *");
1496   CodeGenFunction CGF(CGM, true);
1497   bool HasCancel = false;
1498   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1499     HasCancel = OPD->hasCancel();
1500   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1501     HasCancel = OPSD->hasCancel();
1502   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1503     HasCancel = OPFD->hasCancel();
1504   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1505     HasCancel = OPFD->hasCancel();
1506   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1507     HasCancel = OPFD->hasCancel();
1508   else if (const auto *OPFD =
1509                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1510     HasCancel = OPFD->hasCancel();
1511   else if (const auto *OPFD =
1512                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1513     HasCancel = OPFD->hasCancel();
1514 
1515   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1516   //       parallel region to make cancellation barriers work properly.
1517   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1518   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1519   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1520                                     HasCancel, OutlinedHelperName);
1521   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1522   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1523 }
1524 
1525 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1526     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1527     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1528   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1529   return emitParallelOrTeamsOutlinedFunction(
1530       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1531 }
1532 
1533 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1534     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1535     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1536   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1537   return emitParallelOrTeamsOutlinedFunction(
1538       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1539 }
1540 
1541 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1542     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1543     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1544     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1545     bool Tied, unsigned &NumberOfParts) {
1546   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1547                                               PrePostActionTy &) {
1548     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1549     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1550     llvm::Value *TaskArgs[] = {
1551         UpLoc, ThreadID,
1552         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1553                                     TaskTVar->getType()->castAs<PointerType>())
1554             .getPointer(CGF)};
1555     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1556   };
1557   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1558                                                             UntiedCodeGen);
1559   CodeGen.setAction(Action);
1560   assert(!ThreadIDVar->getType()->isPointerType() &&
1561          "thread id variable must be of type kmp_int32 for tasks");
1562   const OpenMPDirectiveKind Region =
1563       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1564                                                       : OMPD_task;
1565   const CapturedStmt *CS = D.getCapturedStmt(Region);
1566   bool HasCancel = false;
1567   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1568     HasCancel = TD->hasCancel();
1569   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1570     HasCancel = TD->hasCancel();
1571   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1572     HasCancel = TD->hasCancel();
1573   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1574     HasCancel = TD->hasCancel();
1575 
1576   CodeGenFunction CGF(CGM, true);
1577   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1578                                         InnermostKind, HasCancel, Action);
1579   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1580   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1581   if (!Tied)
1582     NumberOfParts = Action.getNumberOfParts();
1583   return Res;
1584 }
1585 
1586 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1587                              const RecordDecl *RD, const CGRecordLayout &RL,
1588                              ArrayRef<llvm::Constant *> Data) {
1589   llvm::StructType *StructTy = RL.getLLVMType();
1590   unsigned PrevIdx = 0;
1591   ConstantInitBuilder CIBuilder(CGM);
1592   auto DI = Data.begin();
1593   for (const FieldDecl *FD : RD->fields()) {
1594     unsigned Idx = RL.getLLVMFieldNo(FD);
1595     // Fill the alignment.
1596     for (unsigned I = PrevIdx; I < Idx; ++I)
1597       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1598     PrevIdx = Idx + 1;
1599     Fields.add(*DI);
1600     ++DI;
1601   }
1602 }
1603 
1604 template <class... As>
1605 static llvm::GlobalVariable *
1606 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1607                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1608                    As &&... Args) {
1609   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1610   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1611   ConstantInitBuilder CIBuilder(CGM);
1612   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1613   buildStructValue(Fields, CGM, RD, RL, Data);
1614   return Fields.finishAndCreateGlobal(
1615       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1616       std::forward<As>(Args)...);
1617 }
1618 
1619 template <typename T>
1620 static void
1621 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1622                                          ArrayRef<llvm::Constant *> Data,
1623                                          T &Parent) {
1624   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1625   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1626   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1627   buildStructValue(Fields, CGM, RD, RL, Data);
1628   Fields.finishAndAddTo(Parent);
1629 }
1630 
1631 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1632   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1633   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1634   FlagsTy FlagsKey(Flags, Reserved2Flags);
1635   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1636   if (!Entry) {
1637     if (!DefaultOpenMPPSource) {
1638       // Initialize default location for psource field of ident_t structure of
1639       // all ident_t objects. Format is ";file;function;line;column;;".
1640       // Taken from
1641       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1642       DefaultOpenMPPSource =
1643           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1644       DefaultOpenMPPSource =
1645           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1646     }
1647 
1648     llvm::Constant *Data[] = {
1649         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1650         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1651         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1652         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1653     llvm::GlobalValue *DefaultOpenMPLocation =
1654         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1655                            llvm::GlobalValue::PrivateLinkage);
1656     DefaultOpenMPLocation->setUnnamedAddr(
1657         llvm::GlobalValue::UnnamedAddr::Global);
1658 
1659     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1660   }
1661   return Address(Entry, Align);
1662 }
1663 
1664 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1665                                              bool AtCurrentPoint) {
1666   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1667   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1668 
1669   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1670   if (AtCurrentPoint) {
1671     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1672         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1673   } else {
1674     Elem.second.ServiceInsertPt =
1675         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1676     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1677   }
1678 }
1679 
1680 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1681   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1682   if (Elem.second.ServiceInsertPt) {
1683     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1684     Elem.second.ServiceInsertPt = nullptr;
1685     Ptr->eraseFromParent();
1686   }
1687 }
1688 
1689 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1690                                                  SourceLocation Loc,
1691                                                  unsigned Flags) {
1692   Flags |= OMP_IDENT_KMPC;
1693   // If no debug info is generated - return global default location.
1694   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1695       Loc.isInvalid())
1696     return getOrCreateDefaultLocation(Flags).getPointer();
1697 
1698   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1699 
1700   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1701   Address LocValue = Address::invalid();
1702   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1703   if (I != OpenMPLocThreadIDMap.end())
1704     LocValue = Address(I->second.DebugLoc, Align);
1705 
1706   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1707   // GetOpenMPThreadID was called before this routine.
1708   if (!LocValue.isValid()) {
1709     // Generate "ident_t .kmpc_loc.addr;"
1710     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1711     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1712     Elem.second.DebugLoc = AI.getPointer();
1713     LocValue = AI;
1714 
1715     if (!Elem.second.ServiceInsertPt)
1716       setLocThreadIdInsertPt(CGF);
1717     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1718     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1719     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1720                              CGF.getTypeSize(IdentQTy));
1721   }
1722 
1723   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1724   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1725   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1726   LValue PSource =
1727       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1728 
1729   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1730   if (OMPDebugLoc == nullptr) {
1731     SmallString<128> Buffer2;
1732     llvm::raw_svector_ostream OS2(Buffer2);
1733     // Build debug location
1734     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1735     OS2 << ";" << PLoc.getFilename() << ";";
1736     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1737       OS2 << FD->getQualifiedNameAsString();
1738     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1739     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1740     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1741   }
1742   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1743   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1744 
1745   // Our callers always pass this to a runtime function, so for
1746   // convenience, go ahead and return a naked pointer.
1747   return LocValue.getPointer();
1748 }
1749 
1750 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1751                                           SourceLocation Loc) {
1752   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1753 
1754   llvm::Value *ThreadID = nullptr;
1755   // Check whether we've already cached a load of the thread id in this
1756   // function.
1757   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1758   if (I != OpenMPLocThreadIDMap.end()) {
1759     ThreadID = I->second.ThreadID;
1760     if (ThreadID != nullptr)
1761       return ThreadID;
1762   }
1763   // If exceptions are enabled, do not use parameter to avoid possible crash.
1764   if (auto *OMPRegionInfo =
1765           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1766     if (OMPRegionInfo->getThreadIDVariable()) {
1767       // Check if this an outlined function with thread id passed as argument.
1768       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1769       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1770       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1771           !CGF.getLangOpts().CXXExceptions ||
1772           CGF.Builder.GetInsertBlock() == TopBlock ||
1773           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1774           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1775               TopBlock ||
1776           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1777               CGF.Builder.GetInsertBlock()) {
1778         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1779         // If value loaded in entry block, cache it and use it everywhere in
1780         // function.
1781         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1782           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1783           Elem.second.ThreadID = ThreadID;
1784         }
1785         return ThreadID;
1786       }
1787     }
1788   }
1789 
1790   // This is not an outlined function region - need to call __kmpc_int32
1791   // kmpc_global_thread_num(ident_t *loc).
1792   // Generate thread id value and cache this value for use across the
1793   // function.
1794   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1795   if (!Elem.second.ServiceInsertPt)
1796     setLocThreadIdInsertPt(CGF);
1797   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1798   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1799   llvm::CallInst *Call = CGF.Builder.CreateCall(
1800       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1801       emitUpdateLocation(CGF, Loc));
1802   Call->setCallingConv(CGF.getRuntimeCC());
1803   Elem.second.ThreadID = Call;
1804   return Call;
1805 }
1806 
1807 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1808   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1809   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1810     clearLocThreadIdInsertPt(CGF);
1811     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1812   }
1813   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1814     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1815       UDRMap.erase(D);
1816     FunctionUDRMap.erase(CGF.CurFn);
1817   }
1818   auto I = FunctionUDMMap.find(CGF.CurFn);
1819   if (I != FunctionUDMMap.end()) {
1820     for(const auto *D : I->second)
1821       UDMMap.erase(D);
1822     FunctionUDMMap.erase(I);
1823   }
1824   LastprivateConditionalToTypes.erase(CGF.CurFn);
1825 }
1826 
1827 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1828   return IdentTy->getPointerTo();
1829 }
1830 
1831 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1832   if (!Kmpc_MicroTy) {
1833     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1834     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1835                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1836     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1837   }
1838   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1839 }
1840 
1841 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1842   llvm::FunctionCallee RTLFn = nullptr;
1843   switch (static_cast<OpenMPRTLFunction>(Function)) {
1844   case OMPRTL__kmpc_fork_call: {
1845     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1846     // microtask, ...);
1847     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1848                                 getKmpc_MicroPointerTy()};
1849     auto *FnTy =
1850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1852     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1853       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1854         llvm::LLVMContext &Ctx = F->getContext();
1855         llvm::MDBuilder MDB(Ctx);
1856         // Annotate the callback behavior of the __kmpc_fork_call:
1857         //  - The callback callee is argument number 2 (microtask).
1858         //  - The first two arguments of the callback callee are unknown (-1).
1859         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1860         //    callback callee.
1861         F->addMetadata(
1862             llvm::LLVMContext::MD_callback,
1863             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1864                                         2, {-1, -1},
1865                                         /* VarArgsArePassed */ true)}));
1866       }
1867     }
1868     break;
1869   }
1870   case OMPRTL__kmpc_global_thread_num: {
1871     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1872     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1873     auto *FnTy =
1874         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1875     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1876     break;
1877   }
1878   case OMPRTL__kmpc_threadprivate_cached: {
1879     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1880     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1882                                 CGM.VoidPtrTy, CGM.SizeTy,
1883                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1884     auto *FnTy =
1885         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1886     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1887     break;
1888   }
1889   case OMPRTL__kmpc_critical: {
1890     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1891     // kmp_critical_name *crit);
1892     llvm::Type *TypeParams[] = {
1893         getIdentTyPointerTy(), CGM.Int32Ty,
1894         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1895     auto *FnTy =
1896         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1897     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1898     break;
1899   }
1900   case OMPRTL__kmpc_critical_with_hint: {
1901     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1902     // kmp_critical_name *crit, uintptr_t hint);
1903     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1904                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1905                                 CGM.IntPtrTy};
1906     auto *FnTy =
1907         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1908     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1909     break;
1910   }
1911   case OMPRTL__kmpc_threadprivate_register: {
1912     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1913     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1914     // typedef void *(*kmpc_ctor)(void *);
1915     auto *KmpcCtorTy =
1916         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1917                                 /*isVarArg*/ false)->getPointerTo();
1918     // typedef void *(*kmpc_cctor)(void *, void *);
1919     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1920     auto *KmpcCopyCtorTy =
1921         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1922                                 /*isVarArg*/ false)
1923             ->getPointerTo();
1924     // typedef void (*kmpc_dtor)(void *);
1925     auto *KmpcDtorTy =
1926         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1927             ->getPointerTo();
1928     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1929                               KmpcCopyCtorTy, KmpcDtorTy};
1930     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1931                                         /*isVarArg*/ false);
1932     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1933     break;
1934   }
1935   case OMPRTL__kmpc_end_critical: {
1936     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1937     // kmp_critical_name *crit);
1938     llvm::Type *TypeParams[] = {
1939         getIdentTyPointerTy(), CGM.Int32Ty,
1940         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1941     auto *FnTy =
1942         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1943     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1944     break;
1945   }
1946   case OMPRTL__kmpc_cancel_barrier: {
1947     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1948     // global_tid);
1949     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1950     auto *FnTy =
1951         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1952     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1953     break;
1954   }
1955   case OMPRTL__kmpc_barrier: {
1956     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1957     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1958     auto *FnTy =
1959         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1960     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1961     break;
1962   }
1963   case OMPRTL__kmpc_for_static_fini: {
1964     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1965     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1968     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1969     break;
1970   }
1971   case OMPRTL__kmpc_push_num_threads: {
1972     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1973     // kmp_int32 num_threads)
1974     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1975                                 CGM.Int32Ty};
1976     auto *FnTy =
1977         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1978     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1979     break;
1980   }
1981   case OMPRTL__kmpc_serialized_parallel: {
1982     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1983     // global_tid);
1984     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1985     auto *FnTy =
1986         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1987     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1988     break;
1989   }
1990   case OMPRTL__kmpc_end_serialized_parallel: {
1991     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1992     // global_tid);
1993     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1994     auto *FnTy =
1995         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1996     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1997     break;
1998   }
1999   case OMPRTL__kmpc_flush: {
2000     // Build void __kmpc_flush(ident_t *loc);
2001     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
2002     auto *FnTy =
2003         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2004     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
2005     break;
2006   }
2007   case OMPRTL__kmpc_master: {
2008     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
2009     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2010     auto *FnTy =
2011         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2012     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2013     break;
2014   }
2015   case OMPRTL__kmpc_end_master: {
2016     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2017     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2018     auto *FnTy =
2019         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2020     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2021     break;
2022   }
2023   case OMPRTL__kmpc_omp_taskyield: {
2024     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2025     // int end_part);
2026     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2029     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2030     break;
2031   }
2032   case OMPRTL__kmpc_single: {
2033     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2034     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2035     auto *FnTy =
2036         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2037     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2038     break;
2039   }
2040   case OMPRTL__kmpc_end_single: {
2041     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2042     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2043     auto *FnTy =
2044         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2045     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2046     break;
2047   }
2048   case OMPRTL__kmpc_omp_task_alloc: {
2049     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2050     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2051     // kmp_routine_entry_t *task_entry);
2052     assert(KmpRoutineEntryPtrTy != nullptr &&
2053            "Type kmp_routine_entry_t must be created.");
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2055                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2056     // Return void * and then cast to particular kmp_task_t type.
2057     auto *FnTy =
2058         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2059     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2060     break;
2061   }
2062   case OMPRTL__kmpc_omp_target_task_alloc: {
2063     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2064     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2065     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2066     assert(KmpRoutineEntryPtrTy != nullptr &&
2067            "Type kmp_routine_entry_t must be created.");
2068     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2069                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2070                                 CGM.Int64Ty};
2071     // Return void * and then cast to particular kmp_task_t type.
2072     auto *FnTy =
2073         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2074     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2075     break;
2076   }
2077   case OMPRTL__kmpc_omp_task: {
2078     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2079     // *new_task);
2080     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2081                                 CGM.VoidPtrTy};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2085     break;
2086   }
2087   case OMPRTL__kmpc_copyprivate: {
2088     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2089     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2090     // kmp_int32 didit);
2091     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2092     auto *CpyFnTy =
2093         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2094     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2095                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2096                                 CGM.Int32Ty};
2097     auto *FnTy =
2098         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2099     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2100     break;
2101   }
2102   case OMPRTL__kmpc_reduce: {
2103     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2104     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2105     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2106     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2107     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2108                                                /*isVarArg=*/false);
2109     llvm::Type *TypeParams[] = {
2110         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2111         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2112         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2113     auto *FnTy =
2114         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2115     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2116     break;
2117   }
2118   case OMPRTL__kmpc_reduce_nowait: {
2119     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2120     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2121     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2122     // *lck);
2123     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2124     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2125                                                /*isVarArg=*/false);
2126     llvm::Type *TypeParams[] = {
2127         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2128         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2129         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2130     auto *FnTy =
2131         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2132     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2133     break;
2134   }
2135   case OMPRTL__kmpc_end_reduce: {
2136     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2137     // kmp_critical_name *lck);
2138     llvm::Type *TypeParams[] = {
2139         getIdentTyPointerTy(), CGM.Int32Ty,
2140         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2144     break;
2145   }
2146   case OMPRTL__kmpc_end_reduce_nowait: {
2147     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2148     // kmp_critical_name *lck);
2149     llvm::Type *TypeParams[] = {
2150         getIdentTyPointerTy(), CGM.Int32Ty,
2151         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2154     RTLFn =
2155         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2156     break;
2157   }
2158   case OMPRTL__kmpc_omp_task_begin_if0: {
2159     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2160     // *new_task);
2161     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2162                                 CGM.VoidPtrTy};
2163     auto *FnTy =
2164         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2165     RTLFn =
2166         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2167     break;
2168   }
2169   case OMPRTL__kmpc_omp_task_complete_if0: {
2170     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2171     // *new_task);
2172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2173                                 CGM.VoidPtrTy};
2174     auto *FnTy =
2175         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2176     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2177                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2178     break;
2179   }
2180   case OMPRTL__kmpc_ordered: {
2181     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2182     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2183     auto *FnTy =
2184         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2185     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2186     break;
2187   }
2188   case OMPRTL__kmpc_end_ordered: {
2189     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2190     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2191     auto *FnTy =
2192         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2193     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2194     break;
2195   }
2196   case OMPRTL__kmpc_omp_taskwait: {
2197     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2198     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2199     auto *FnTy =
2200         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2201     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2202     break;
2203   }
2204   case OMPRTL__kmpc_taskgroup: {
2205     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207     auto *FnTy =
2208         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2210     break;
2211   }
2212   case OMPRTL__kmpc_end_taskgroup: {
2213     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2214     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2215     auto *FnTy =
2216         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2217     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2218     break;
2219   }
2220   case OMPRTL__kmpc_push_proc_bind: {
2221     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2222     // int proc_bind)
2223     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2224     auto *FnTy =
2225         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2226     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2227     break;
2228   }
2229   case OMPRTL__kmpc_omp_task_with_deps: {
2230     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2231     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2232     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2233     llvm::Type *TypeParams[] = {
2234         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2235         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2238     RTLFn =
2239         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2240     break;
2241   }
2242   case OMPRTL__kmpc_omp_wait_deps: {
2243     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2244     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2245     // kmp_depend_info_t *noalias_dep_list);
2246     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2247                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2248                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2249     auto *FnTy =
2250         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2251     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2252     break;
2253   }
2254   case OMPRTL__kmpc_cancellationpoint: {
2255     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2256     // global_tid, kmp_int32 cncl_kind)
2257     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2258     auto *FnTy =
2259         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2260     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2261     break;
2262   }
2263   case OMPRTL__kmpc_cancel: {
2264     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2265     // kmp_int32 cncl_kind)
2266     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2270     break;
2271   }
2272   case OMPRTL__kmpc_push_num_teams: {
2273     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2274     // kmp_int32 num_teams, kmp_int32 num_threads)
2275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2276         CGM.Int32Ty};
2277     auto *FnTy =
2278         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2279     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2280     break;
2281   }
2282   case OMPRTL__kmpc_fork_teams: {
2283     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2284     // microtask, ...);
2285     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2286                                 getKmpc_MicroPointerTy()};
2287     auto *FnTy =
2288         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2289     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2290     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2291       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2292         llvm::LLVMContext &Ctx = F->getContext();
2293         llvm::MDBuilder MDB(Ctx);
2294         // Annotate the callback behavior of the __kmpc_fork_teams:
2295         //  - The callback callee is argument number 2 (microtask).
2296         //  - The first two arguments of the callback callee are unknown (-1).
2297         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2298         //    callback callee.
2299         F->addMetadata(
2300             llvm::LLVMContext::MD_callback,
2301             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2302                                         2, {-1, -1},
2303                                         /* VarArgsArePassed */ true)}));
2304       }
2305     }
2306     break;
2307   }
2308   case OMPRTL__kmpc_taskloop: {
2309     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2310     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2311     // sched, kmp_uint64 grainsize, void *task_dup);
2312     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2313                                 CGM.IntTy,
2314                                 CGM.VoidPtrTy,
2315                                 CGM.IntTy,
2316                                 CGM.Int64Ty->getPointerTo(),
2317                                 CGM.Int64Ty->getPointerTo(),
2318                                 CGM.Int64Ty,
2319                                 CGM.IntTy,
2320                                 CGM.IntTy,
2321                                 CGM.Int64Ty,
2322                                 CGM.VoidPtrTy};
2323     auto *FnTy =
2324         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2326     break;
2327   }
2328   case OMPRTL__kmpc_doacross_init: {
2329     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2330     // num_dims, struct kmp_dim *dims);
2331     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2332                                 CGM.Int32Ty,
2333                                 CGM.Int32Ty,
2334                                 CGM.VoidPtrTy};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2338     break;
2339   }
2340   case OMPRTL__kmpc_doacross_fini: {
2341     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2342     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2343     auto *FnTy =
2344         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2345     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2346     break;
2347   }
2348   case OMPRTL__kmpc_doacross_post: {
2349     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2350     // *vec);
2351     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2352                                 CGM.Int64Ty->getPointerTo()};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2356     break;
2357   }
2358   case OMPRTL__kmpc_doacross_wait: {
2359     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2360     // *vec);
2361     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2362                                 CGM.Int64Ty->getPointerTo()};
2363     auto *FnTy =
2364         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2365     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2366     break;
2367   }
2368   case OMPRTL__kmpc_task_reduction_init: {
2369     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2370     // *data);
2371     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2372     auto *FnTy =
2373         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2374     RTLFn =
2375         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2376     break;
2377   }
2378   case OMPRTL__kmpc_task_reduction_get_th_data: {
2379     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2380     // *d);
2381     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2382     auto *FnTy =
2383         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2384     RTLFn = CGM.CreateRuntimeFunction(
2385         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2386     break;
2387   }
2388   case OMPRTL__kmpc_alloc: {
2389     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2390     // al); omp_allocator_handle_t type is void *.
2391     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2392     auto *FnTy =
2393         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2394     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2395     break;
2396   }
2397   case OMPRTL__kmpc_free: {
2398     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2399     // al); omp_allocator_handle_t type is void *.
2400     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2401     auto *FnTy =
2402         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2403     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2404     break;
2405   }
2406   case OMPRTL__kmpc_push_target_tripcount: {
2407     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2408     // size);
2409     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2410     llvm::FunctionType *FnTy =
2411         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2412     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2413     break;
2414   }
2415   case OMPRTL__tgt_target: {
2416     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2417     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2418     // *arg_types);
2419     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2420                                 CGM.VoidPtrTy,
2421                                 CGM.Int32Ty,
2422                                 CGM.VoidPtrPtrTy,
2423                                 CGM.VoidPtrPtrTy,
2424                                 CGM.Int64Ty->getPointerTo(),
2425                                 CGM.Int64Ty->getPointerTo()};
2426     auto *FnTy =
2427         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2428     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2429     break;
2430   }
2431   case OMPRTL__tgt_target_nowait: {
2432     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2433     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2434     // int64_t *arg_types);
2435     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2436                                 CGM.VoidPtrTy,
2437                                 CGM.Int32Ty,
2438                                 CGM.VoidPtrPtrTy,
2439                                 CGM.VoidPtrPtrTy,
2440                                 CGM.Int64Ty->getPointerTo(),
2441                                 CGM.Int64Ty->getPointerTo()};
2442     auto *FnTy =
2443         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2444     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2445     break;
2446   }
2447   case OMPRTL__tgt_target_teams: {
2448     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2449     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2450     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2451     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452                                 CGM.VoidPtrTy,
2453                                 CGM.Int32Ty,
2454                                 CGM.VoidPtrPtrTy,
2455                                 CGM.VoidPtrPtrTy,
2456                                 CGM.Int64Ty->getPointerTo(),
2457                                 CGM.Int64Ty->getPointerTo(),
2458                                 CGM.Int32Ty,
2459                                 CGM.Int32Ty};
2460     auto *FnTy =
2461         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2462     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2463     break;
2464   }
2465   case OMPRTL__tgt_target_teams_nowait: {
2466     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2467     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2468     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2469     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2470                                 CGM.VoidPtrTy,
2471                                 CGM.Int32Ty,
2472                                 CGM.VoidPtrPtrTy,
2473                                 CGM.VoidPtrPtrTy,
2474                                 CGM.Int64Ty->getPointerTo(),
2475                                 CGM.Int64Ty->getPointerTo(),
2476                                 CGM.Int32Ty,
2477                                 CGM.Int32Ty};
2478     auto *FnTy =
2479         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2480     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2481     break;
2482   }
2483   case OMPRTL__tgt_register_requires: {
2484     // Build void __tgt_register_requires(int64_t flags);
2485     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2489     break;
2490   }
2491   case OMPRTL__tgt_target_data_begin: {
2492     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2493     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2494     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2495                                 CGM.Int32Ty,
2496                                 CGM.VoidPtrPtrTy,
2497                                 CGM.VoidPtrPtrTy,
2498                                 CGM.Int64Ty->getPointerTo(),
2499                                 CGM.Int64Ty->getPointerTo()};
2500     auto *FnTy =
2501         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2502     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2503     break;
2504   }
2505   case OMPRTL__tgt_target_data_begin_nowait: {
2506     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2507     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2508     // *arg_types);
2509     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2510                                 CGM.Int32Ty,
2511                                 CGM.VoidPtrPtrTy,
2512                                 CGM.VoidPtrPtrTy,
2513                                 CGM.Int64Ty->getPointerTo(),
2514                                 CGM.Int64Ty->getPointerTo()};
2515     auto *FnTy =
2516         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2517     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2518     break;
2519   }
2520   case OMPRTL__tgt_target_data_end: {
2521     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2522     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2523     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2524                                 CGM.Int32Ty,
2525                                 CGM.VoidPtrPtrTy,
2526                                 CGM.VoidPtrPtrTy,
2527                                 CGM.Int64Ty->getPointerTo(),
2528                                 CGM.Int64Ty->getPointerTo()};
2529     auto *FnTy =
2530         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2531     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2532     break;
2533   }
2534   case OMPRTL__tgt_target_data_end_nowait: {
2535     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2536     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2537     // *arg_types);
2538     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2539                                 CGM.Int32Ty,
2540                                 CGM.VoidPtrPtrTy,
2541                                 CGM.VoidPtrPtrTy,
2542                                 CGM.Int64Ty->getPointerTo(),
2543                                 CGM.Int64Ty->getPointerTo()};
2544     auto *FnTy =
2545         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2547     break;
2548   }
2549   case OMPRTL__tgt_target_data_update: {
2550     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2551     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2552     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2553                                 CGM.Int32Ty,
2554                                 CGM.VoidPtrPtrTy,
2555                                 CGM.VoidPtrPtrTy,
2556                                 CGM.Int64Ty->getPointerTo(),
2557                                 CGM.Int64Ty->getPointerTo()};
2558     auto *FnTy =
2559         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2561     break;
2562   }
2563   case OMPRTL__tgt_target_data_update_nowait: {
2564     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2565     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2566     // *arg_types);
2567     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2568                                 CGM.Int32Ty,
2569                                 CGM.VoidPtrPtrTy,
2570                                 CGM.VoidPtrPtrTy,
2571                                 CGM.Int64Ty->getPointerTo(),
2572                                 CGM.Int64Ty->getPointerTo()};
2573     auto *FnTy =
2574         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2575     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2576     break;
2577   }
2578   case OMPRTL__tgt_mapper_num_components: {
2579     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2580     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2581     auto *FnTy =
2582         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2583     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2584     break;
2585   }
2586   case OMPRTL__tgt_push_mapper_component: {
2587     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2588     // *base, void *begin, int64_t size, int64_t type);
2589     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2590                                 CGM.Int64Ty, CGM.Int64Ty};
2591     auto *FnTy =
2592         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2593     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2594     break;
2595   }
2596   }
2597   assert(RTLFn && "Unable to find OpenMP runtime function");
2598   return RTLFn;
2599 }
2600 
2601 llvm::FunctionCallee
2602 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2603   assert((IVSize == 32 || IVSize == 64) &&
2604          "IV size is not compatible with the omp runtime");
2605   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2606                                             : "__kmpc_for_static_init_4u")
2607                                 : (IVSigned ? "__kmpc_for_static_init_8"
2608                                             : "__kmpc_for_static_init_8u");
2609   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2610   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2611   llvm::Type *TypeParams[] = {
2612     getIdentTyPointerTy(),                     // loc
2613     CGM.Int32Ty,                               // tid
2614     CGM.Int32Ty,                               // schedtype
2615     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2616     PtrTy,                                     // p_lower
2617     PtrTy,                                     // p_upper
2618     PtrTy,                                     // p_stride
2619     ITy,                                       // incr
2620     ITy                                        // chunk
2621   };
2622   auto *FnTy =
2623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2624   return CGM.CreateRuntimeFunction(FnTy, Name);
2625 }
2626 
2627 llvm::FunctionCallee
2628 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2629   assert((IVSize == 32 || IVSize == 64) &&
2630          "IV size is not compatible with the omp runtime");
2631   StringRef Name =
2632       IVSize == 32
2633           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2634           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2636   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2637                                CGM.Int32Ty,           // tid
2638                                CGM.Int32Ty,           // schedtype
2639                                ITy,                   // lower
2640                                ITy,                   // upper
2641                                ITy,                   // stride
2642                                ITy                    // chunk
2643   };
2644   auto *FnTy =
2645       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2646   return CGM.CreateRuntimeFunction(FnTy, Name);
2647 }
2648 
2649 llvm::FunctionCallee
2650 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2651   assert((IVSize == 32 || IVSize == 64) &&
2652          "IV size is not compatible with the omp runtime");
2653   StringRef Name =
2654       IVSize == 32
2655           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2656           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2657   llvm::Type *TypeParams[] = {
2658       getIdentTyPointerTy(), // loc
2659       CGM.Int32Ty,           // tid
2660   };
2661   auto *FnTy =
2662       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2663   return CGM.CreateRuntimeFunction(FnTy, Name);
2664 }
2665 
2666 llvm::FunctionCallee
2667 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2668   assert((IVSize == 32 || IVSize == 64) &&
2669          "IV size is not compatible with the omp runtime");
2670   StringRef Name =
2671       IVSize == 32
2672           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2673           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2674   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2675   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2676   llvm::Type *TypeParams[] = {
2677     getIdentTyPointerTy(),                     // loc
2678     CGM.Int32Ty,                               // tid
2679     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2680     PtrTy,                                     // p_lower
2681     PtrTy,                                     // p_upper
2682     PtrTy                                      // p_stride
2683   };
2684   auto *FnTy =
2685       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2686   return CGM.CreateRuntimeFunction(FnTy, Name);
2687 }
2688 
2689 /// Obtain information that uniquely identifies a target entry. This
2690 /// consists of the file and device IDs as well as line number associated with
2691 /// the relevant entry source location.
2692 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2693                                      unsigned &DeviceID, unsigned &FileID,
2694                                      unsigned &LineNum) {
2695   SourceManager &SM = C.getSourceManager();
2696 
2697   // The loc should be always valid and have a file ID (the user cannot use
2698   // #pragma directives in macros)
2699 
2700   assert(Loc.isValid() && "Source location is expected to be always valid.");
2701 
2702   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2703   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2704 
2705   llvm::sys::fs::UniqueID ID;
2706   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2707     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2708         << PLoc.getFilename() << EC.message();
2709 
2710   DeviceID = ID.getDevice();
2711   FileID = ID.getFile();
2712   LineNum = PLoc.getLine();
2713 }
2714 
2715 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2716   if (CGM.getLangOpts().OpenMPSimd)
2717     return Address::invalid();
2718   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2719       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2720   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2721               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2722                HasRequiresUnifiedSharedMemory))) {
2723     SmallString<64> PtrName;
2724     {
2725       llvm::raw_svector_ostream OS(PtrName);
2726       OS << CGM.getMangledName(GlobalDecl(VD));
2727       if (!VD->isExternallyVisible()) {
2728         unsigned DeviceID, FileID, Line;
2729         getTargetEntryUniqueInfo(CGM.getContext(),
2730                                  VD->getCanonicalDecl()->getBeginLoc(),
2731                                  DeviceID, FileID, Line);
2732         OS << llvm::format("_%x", FileID);
2733       }
2734       OS << "_decl_tgt_ref_ptr";
2735     }
2736     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2737     if (!Ptr) {
2738       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2739       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2740                                         PtrName);
2741 
2742       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2743       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2744 
2745       if (!CGM.getLangOpts().OpenMPIsDevice)
2746         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2747       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2748     }
2749     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2750   }
2751   return Address::invalid();
2752 }
2753 
2754 llvm::Constant *
2755 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2756   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2757          !CGM.getContext().getTargetInfo().isTLSSupported());
2758   // Lookup the entry, lazily creating it if necessary.
2759   std::string Suffix = getName({"cache", ""});
2760   return getOrCreateInternalVariable(
2761       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2762 }
2763 
2764 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2765                                                 const VarDecl *VD,
2766                                                 Address VDAddr,
2767                                                 SourceLocation Loc) {
2768   if (CGM.getLangOpts().OpenMPUseTLS &&
2769       CGM.getContext().getTargetInfo().isTLSSupported())
2770     return VDAddr;
2771 
2772   llvm::Type *VarTy = VDAddr.getElementType();
2773   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2774                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2775                                                        CGM.Int8PtrTy),
2776                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2777                          getOrCreateThreadPrivateCache(VD)};
2778   return Address(CGF.EmitRuntimeCall(
2779       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2780                  VDAddr.getAlignment());
2781 }
2782 
2783 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2784     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2785     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2786   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2787   // library.
2788   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2789   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2790                       OMPLoc);
2791   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2792   // to register constructor/destructor for variable.
2793   llvm::Value *Args[] = {
2794       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2795       Ctor, CopyCtor, Dtor};
2796   CGF.EmitRuntimeCall(
2797       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2798 }
2799 
2800 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2801     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2802     bool PerformInit, CodeGenFunction *CGF) {
2803   if (CGM.getLangOpts().OpenMPUseTLS &&
2804       CGM.getContext().getTargetInfo().isTLSSupported())
2805     return nullptr;
2806 
2807   VD = VD->getDefinition(CGM.getContext());
2808   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2809     QualType ASTTy = VD->getType();
2810 
2811     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2812     const Expr *Init = VD->getAnyInitializer();
2813     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2814       // Generate function that re-emits the declaration's initializer into the
2815       // threadprivate copy of the variable VD
2816       CodeGenFunction CtorCGF(CGM);
2817       FunctionArgList Args;
2818       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2819                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2820                             ImplicitParamDecl::Other);
2821       Args.push_back(&Dst);
2822 
2823       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2824           CGM.getContext().VoidPtrTy, Args);
2825       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2826       std::string Name = getName({"__kmpc_global_ctor_", ""});
2827       llvm::Function *Fn =
2828           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2829       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2830                             Args, Loc, Loc);
2831       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2832           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2833           CGM.getContext().VoidPtrTy, Dst.getLocation());
2834       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2835       Arg = CtorCGF.Builder.CreateElementBitCast(
2836           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2837       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2838                                /*IsInitializer=*/true);
2839       ArgVal = CtorCGF.EmitLoadOfScalar(
2840           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2841           CGM.getContext().VoidPtrTy, Dst.getLocation());
2842       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2843       CtorCGF.FinishFunction();
2844       Ctor = Fn;
2845     }
2846     if (VD->getType().isDestructedType() != QualType::DK_none) {
2847       // Generate function that emits destructor call for the threadprivate copy
2848       // of the variable VD
2849       CodeGenFunction DtorCGF(CGM);
2850       FunctionArgList Args;
2851       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2852                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2853                             ImplicitParamDecl::Other);
2854       Args.push_back(&Dst);
2855 
2856       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2857           CGM.getContext().VoidTy, Args);
2858       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2859       std::string Name = getName({"__kmpc_global_dtor_", ""});
2860       llvm::Function *Fn =
2861           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2862       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2863       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2864                             Loc, Loc);
2865       // Create a scope with an artificial location for the body of this function.
2866       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2867       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2868           DtorCGF.GetAddrOfLocalVar(&Dst),
2869           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2870       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2871                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2872                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2873       DtorCGF.FinishFunction();
2874       Dtor = Fn;
2875     }
2876     // Do not emit init function if it is not required.
2877     if (!Ctor && !Dtor)
2878       return nullptr;
2879 
2880     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2881     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2882                                                /*isVarArg=*/false)
2883                            ->getPointerTo();
2884     // Copying constructor for the threadprivate variable.
2885     // Must be NULL - reserved by runtime, but currently it requires that this
2886     // parameter is always NULL. Otherwise it fires assertion.
2887     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2888     if (Ctor == nullptr) {
2889       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2890                                              /*isVarArg=*/false)
2891                          ->getPointerTo();
2892       Ctor = llvm::Constant::getNullValue(CtorTy);
2893     }
2894     if (Dtor == nullptr) {
2895       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2896                                              /*isVarArg=*/false)
2897                          ->getPointerTo();
2898       Dtor = llvm::Constant::getNullValue(DtorTy);
2899     }
2900     if (!CGF) {
2901       auto *InitFunctionTy =
2902           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2903       std::string Name = getName({"__omp_threadprivate_init_", ""});
2904       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2905           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2906       CodeGenFunction InitCGF(CGM);
2907       FunctionArgList ArgList;
2908       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2909                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2910                             Loc, Loc);
2911       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2912       InitCGF.FinishFunction();
2913       return InitFunction;
2914     }
2915     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2916   }
2917   return nullptr;
2918 }
2919 
2920 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2921                                                      llvm::GlobalVariable *Addr,
2922                                                      bool PerformInit) {
2923   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2924       !CGM.getLangOpts().OpenMPIsDevice)
2925     return false;
2926   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2927       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2928   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2929       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2930        HasRequiresUnifiedSharedMemory))
2931     return CGM.getLangOpts().OpenMPIsDevice;
2932   VD = VD->getDefinition(CGM.getContext());
2933   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2934     return CGM.getLangOpts().OpenMPIsDevice;
2935 
2936   QualType ASTTy = VD->getType();
2937 
2938   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2939   // Produce the unique prefix to identify the new target regions. We use
2940   // the source location of the variable declaration which we know to not
2941   // conflict with any target region.
2942   unsigned DeviceID;
2943   unsigned FileID;
2944   unsigned Line;
2945   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2946   SmallString<128> Buffer, Out;
2947   {
2948     llvm::raw_svector_ostream OS(Buffer);
2949     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2950        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2951   }
2952 
2953   const Expr *Init = VD->getAnyInitializer();
2954   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2955     llvm::Constant *Ctor;
2956     llvm::Constant *ID;
2957     if (CGM.getLangOpts().OpenMPIsDevice) {
2958       // Generate function that re-emits the declaration's initializer into
2959       // the threadprivate copy of the variable VD
2960       CodeGenFunction CtorCGF(CGM);
2961 
2962       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2963       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2964       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2965           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2966       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2967       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2968                             FunctionArgList(), Loc, Loc);
2969       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2970       CtorCGF.EmitAnyExprToMem(Init,
2971                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2972                                Init->getType().getQualifiers(),
2973                                /*IsInitializer=*/true);
2974       CtorCGF.FinishFunction();
2975       Ctor = Fn;
2976       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2977       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2978     } else {
2979       Ctor = new llvm::GlobalVariable(
2980           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2981           llvm::GlobalValue::PrivateLinkage,
2982           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2983       ID = Ctor;
2984     }
2985 
2986     // Register the information for the entry associated with the constructor.
2987     Out.clear();
2988     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2989         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2990         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2991   }
2992   if (VD->getType().isDestructedType() != QualType::DK_none) {
2993     llvm::Constant *Dtor;
2994     llvm::Constant *ID;
2995     if (CGM.getLangOpts().OpenMPIsDevice) {
2996       // Generate function that emits destructor call for the threadprivate
2997       // copy of the variable VD
2998       CodeGenFunction DtorCGF(CGM);
2999 
3000       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3001       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3002       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3003           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3004       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3005       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3006                             FunctionArgList(), Loc, Loc);
3007       // Create a scope with an artificial location for the body of this
3008       // function.
3009       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3010       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3011                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3012                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3013       DtorCGF.FinishFunction();
3014       Dtor = Fn;
3015       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3016       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3017     } else {
3018       Dtor = new llvm::GlobalVariable(
3019           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3020           llvm::GlobalValue::PrivateLinkage,
3021           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3022       ID = Dtor;
3023     }
3024     // Register the information for the entry associated with the destructor.
3025     Out.clear();
3026     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3027         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3028         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3029   }
3030   return CGM.getLangOpts().OpenMPIsDevice;
3031 }
3032 
3033 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3034                                                           QualType VarType,
3035                                                           StringRef Name) {
3036   std::string Suffix = getName({"artificial", ""});
3037   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3038   llvm::Value *GAddr =
3039       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3040   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3041       CGM.getTarget().isTLSSupported()) {
3042     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3043     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3044   }
3045   std::string CacheSuffix = getName({"cache", ""});
3046   llvm::Value *Args[] = {
3047       emitUpdateLocation(CGF, SourceLocation()),
3048       getThreadID(CGF, SourceLocation()),
3049       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3050       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3051                                 /*isSigned=*/false),
3052       getOrCreateInternalVariable(
3053           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3054   return Address(
3055       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3056           CGF.EmitRuntimeCall(
3057               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3058           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3059       CGM.getContext().getTypeAlignInChars(VarType));
3060 }
3061 
3062 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3063                                    const RegionCodeGenTy &ThenGen,
3064                                    const RegionCodeGenTy &ElseGen) {
3065   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3066 
3067   // If the condition constant folds and can be elided, try to avoid emitting
3068   // the condition and the dead arm of the if/else.
3069   bool CondConstant;
3070   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3071     if (CondConstant)
3072       ThenGen(CGF);
3073     else
3074       ElseGen(CGF);
3075     return;
3076   }
3077 
3078   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3079   // emit the conditional branch.
3080   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3081   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3082   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3083   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3084 
3085   // Emit the 'then' code.
3086   CGF.EmitBlock(ThenBlock);
3087   ThenGen(CGF);
3088   CGF.EmitBranch(ContBlock);
3089   // Emit the 'else' code if present.
3090   // There is no need to emit line number for unconditional branch.
3091   (void)ApplyDebugLocation::CreateEmpty(CGF);
3092   CGF.EmitBlock(ElseBlock);
3093   ElseGen(CGF);
3094   // There is no need to emit line number for unconditional branch.
3095   (void)ApplyDebugLocation::CreateEmpty(CGF);
3096   CGF.EmitBranch(ContBlock);
3097   // Emit the continuation block for code after the if.
3098   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3099 }
3100 
3101 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3102                                        llvm::Function *OutlinedFn,
3103                                        ArrayRef<llvm::Value *> CapturedVars,
3104                                        const Expr *IfCond) {
3105   if (!CGF.HaveInsertPoint())
3106     return;
3107   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3108   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3109                                                      PrePostActionTy &) {
3110     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3111     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3112     llvm::Value *Args[] = {
3113         RTLoc,
3114         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3115         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3116     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3117     RealArgs.append(std::begin(Args), std::end(Args));
3118     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3119 
3120     llvm::FunctionCallee RTLFn =
3121         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3122     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3123   };
3124   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3125                                                           PrePostActionTy &) {
3126     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3127     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3128     // Build calls:
3129     // __kmpc_serialized_parallel(&Loc, GTid);
3130     llvm::Value *Args[] = {RTLoc, ThreadID};
3131     CGF.EmitRuntimeCall(
3132         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3133 
3134     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3135     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3136     Address ZeroAddrBound =
3137         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3138                                          /*Name=*/".bound.zero.addr");
3139     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3140     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3141     // ThreadId for serialized parallels is 0.
3142     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3143     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3144     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3145     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3146 
3147     // __kmpc_end_serialized_parallel(&Loc, GTid);
3148     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3149     CGF.EmitRuntimeCall(
3150         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3151         EndArgs);
3152   };
3153   if (IfCond) {
3154     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3155   } else {
3156     RegionCodeGenTy ThenRCG(ThenGen);
3157     ThenRCG(CGF);
3158   }
3159 }
3160 
3161 // If we're inside an (outlined) parallel region, use the region info's
3162 // thread-ID variable (it is passed in a first argument of the outlined function
3163 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3164 // regular serial code region, get thread ID by calling kmp_int32
3165 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3166 // return the address of that temp.
3167 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3168                                              SourceLocation Loc) {
3169   if (auto *OMPRegionInfo =
3170           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3171     if (OMPRegionInfo->getThreadIDVariable())
3172       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3173 
3174   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3175   QualType Int32Ty =
3176       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3177   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3178   CGF.EmitStoreOfScalar(ThreadID,
3179                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3180 
3181   return ThreadIDTemp;
3182 }
3183 
3184 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3185     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3186   SmallString<256> Buffer;
3187   llvm::raw_svector_ostream Out(Buffer);
3188   Out << Name;
3189   StringRef RuntimeName = Out.str();
3190   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3191   if (Elem.second) {
3192     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3193            "OMP internal variable has different type than requested");
3194     return &*Elem.second;
3195   }
3196 
3197   return Elem.second = new llvm::GlobalVariable(
3198              CGM.getModule(), Ty, /*IsConstant*/ false,
3199              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3200              Elem.first(), /*InsertBefore=*/nullptr,
3201              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3202 }
3203 
3204 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3205   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3206   std::string Name = getName({Prefix, "var"});
3207   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3208 }
3209 
3210 namespace {
3211 /// Common pre(post)-action for different OpenMP constructs.
3212 class CommonActionTy final : public PrePostActionTy {
3213   llvm::FunctionCallee EnterCallee;
3214   ArrayRef<llvm::Value *> EnterArgs;
3215   llvm::FunctionCallee ExitCallee;
3216   ArrayRef<llvm::Value *> ExitArgs;
3217   bool Conditional;
3218   llvm::BasicBlock *ContBlock = nullptr;
3219 
3220 public:
3221   CommonActionTy(llvm::FunctionCallee EnterCallee,
3222                  ArrayRef<llvm::Value *> EnterArgs,
3223                  llvm::FunctionCallee ExitCallee,
3224                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3225       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3226         ExitArgs(ExitArgs), Conditional(Conditional) {}
3227   void Enter(CodeGenFunction &CGF) override {
3228     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3229     if (Conditional) {
3230       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3231       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3232       ContBlock = CGF.createBasicBlock("omp_if.end");
3233       // Generate the branch (If-stmt)
3234       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3235       CGF.EmitBlock(ThenBlock);
3236     }
3237   }
3238   void Done(CodeGenFunction &CGF) {
3239     // Emit the rest of blocks/branches
3240     CGF.EmitBranch(ContBlock);
3241     CGF.EmitBlock(ContBlock, true);
3242   }
3243   void Exit(CodeGenFunction &CGF) override {
3244     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3245   }
3246 };
3247 } // anonymous namespace
3248 
3249 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3250                                          StringRef CriticalName,
3251                                          const RegionCodeGenTy &CriticalOpGen,
3252                                          SourceLocation Loc, const Expr *Hint) {
3253   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3254   // CriticalOpGen();
3255   // __kmpc_end_critical(ident_t *, gtid, Lock);
3256   // Prepare arguments and build a call to __kmpc_critical
3257   if (!CGF.HaveInsertPoint())
3258     return;
3259   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3260                          getCriticalRegionLock(CriticalName)};
3261   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3262                                                 std::end(Args));
3263   if (Hint) {
3264     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3265         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3266   }
3267   CommonActionTy Action(
3268       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3269                                  : OMPRTL__kmpc_critical),
3270       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3271   CriticalOpGen.setAction(Action);
3272   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3273 }
3274 
3275 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3276                                        const RegionCodeGenTy &MasterOpGen,
3277                                        SourceLocation Loc) {
3278   if (!CGF.HaveInsertPoint())
3279     return;
3280   // if(__kmpc_master(ident_t *, gtid)) {
3281   //   MasterOpGen();
3282   //   __kmpc_end_master(ident_t *, gtid);
3283   // }
3284   // Prepare arguments and build a call to __kmpc_master
3285   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3286   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3287                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3288                         /*Conditional=*/true);
3289   MasterOpGen.setAction(Action);
3290   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3291   Action.Done(CGF);
3292 }
3293 
3294 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3295                                         SourceLocation Loc) {
3296   if (!CGF.HaveInsertPoint())
3297     return;
3298   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3299   if (OMPBuilder) {
3300     OMPBuilder->CreateTaskyield(CGF.Builder);
3301   } else {
3302     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3303     llvm::Value *Args[] = {
3304         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3305         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3306     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3307                         Args);
3308   }
3309 
3310   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3311     Region->emitUntiedSwitch(CGF);
3312 }
3313 
3314 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3315                                           const RegionCodeGenTy &TaskgroupOpGen,
3316                                           SourceLocation Loc) {
3317   if (!CGF.HaveInsertPoint())
3318     return;
3319   // __kmpc_taskgroup(ident_t *, gtid);
3320   // TaskgroupOpGen();
3321   // __kmpc_end_taskgroup(ident_t *, gtid);
3322   // Prepare arguments and build a call to __kmpc_taskgroup
3323   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3324   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3325                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3326                         Args);
3327   TaskgroupOpGen.setAction(Action);
3328   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3329 }
3330 
3331 /// Given an array of pointers to variables, project the address of a
3332 /// given variable.
3333 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3334                                       unsigned Index, const VarDecl *Var) {
3335   // Pull out the pointer to the variable.
3336   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3337   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3338 
3339   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3340   Addr = CGF.Builder.CreateElementBitCast(
3341       Addr, CGF.ConvertTypeForMem(Var->getType()));
3342   return Addr;
3343 }
3344 
3345 static llvm::Value *emitCopyprivateCopyFunction(
3346     CodeGenModule &CGM, llvm::Type *ArgsType,
3347     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3348     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3349     SourceLocation Loc) {
3350   ASTContext &C = CGM.getContext();
3351   // void copy_func(void *LHSArg, void *RHSArg);
3352   FunctionArgList Args;
3353   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3354                            ImplicitParamDecl::Other);
3355   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3356                            ImplicitParamDecl::Other);
3357   Args.push_back(&LHSArg);
3358   Args.push_back(&RHSArg);
3359   const auto &CGFI =
3360       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3361   std::string Name =
3362       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3363   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3364                                     llvm::GlobalValue::InternalLinkage, Name,
3365                                     &CGM.getModule());
3366   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3367   Fn->setDoesNotRecurse();
3368   CodeGenFunction CGF(CGM);
3369   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3370   // Dest = (void*[n])(LHSArg);
3371   // Src = (void*[n])(RHSArg);
3372   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3373       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3374       ArgsType), CGF.getPointerAlign());
3375   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3376       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3377       ArgsType), CGF.getPointerAlign());
3378   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3379   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3380   // ...
3381   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3382   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3383     const auto *DestVar =
3384         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3385     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3386 
3387     const auto *SrcVar =
3388         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3389     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3390 
3391     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3392     QualType Type = VD->getType();
3393     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3394   }
3395   CGF.FinishFunction();
3396   return Fn;
3397 }
3398 
3399 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3400                                        const RegionCodeGenTy &SingleOpGen,
3401                                        SourceLocation Loc,
3402                                        ArrayRef<const Expr *> CopyprivateVars,
3403                                        ArrayRef<const Expr *> SrcExprs,
3404                                        ArrayRef<const Expr *> DstExprs,
3405                                        ArrayRef<const Expr *> AssignmentOps) {
3406   if (!CGF.HaveInsertPoint())
3407     return;
3408   assert(CopyprivateVars.size() == SrcExprs.size() &&
3409          CopyprivateVars.size() == DstExprs.size() &&
3410          CopyprivateVars.size() == AssignmentOps.size());
3411   ASTContext &C = CGM.getContext();
3412   // int32 did_it = 0;
3413   // if(__kmpc_single(ident_t *, gtid)) {
3414   //   SingleOpGen();
3415   //   __kmpc_end_single(ident_t *, gtid);
3416   //   did_it = 1;
3417   // }
3418   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3419   // <copy_func>, did_it);
3420 
3421   Address DidIt = Address::invalid();
3422   if (!CopyprivateVars.empty()) {
3423     // int32 did_it = 0;
3424     QualType KmpInt32Ty =
3425         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3426     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3427     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3428   }
3429   // Prepare arguments and build a call to __kmpc_single
3430   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3431   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3432                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3433                         /*Conditional=*/true);
3434   SingleOpGen.setAction(Action);
3435   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3436   if (DidIt.isValid()) {
3437     // did_it = 1;
3438     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3439   }
3440   Action.Done(CGF);
3441   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3442   // <copy_func>, did_it);
3443   if (DidIt.isValid()) {
3444     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3445     QualType CopyprivateArrayTy = C.getConstantArrayType(
3446         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3447         /*IndexTypeQuals=*/0);
3448     // Create a list of all private variables for copyprivate.
3449     Address CopyprivateList =
3450         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3451     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3452       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3453       CGF.Builder.CreateStore(
3454           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3455               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3456               CGF.VoidPtrTy),
3457           Elem);
3458     }
3459     // Build function that copies private values from single region to all other
3460     // threads in the corresponding parallel region.
3461     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3462         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3463         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3464     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3465     Address CL =
3466       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3467                                                       CGF.VoidPtrTy);
3468     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3469     llvm::Value *Args[] = {
3470         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3471         getThreadID(CGF, Loc),        // i32 <gtid>
3472         BufSize,                      // size_t <buf_size>
3473         CL.getPointer(),              // void *<copyprivate list>
3474         CpyFn,                        // void (*) (void *, void *) <copy_func>
3475         DidItVal                      // i32 did_it
3476     };
3477     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3478   }
3479 }
3480 
3481 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3482                                         const RegionCodeGenTy &OrderedOpGen,
3483                                         SourceLocation Loc, bool IsThreads) {
3484   if (!CGF.HaveInsertPoint())
3485     return;
3486   // __kmpc_ordered(ident_t *, gtid);
3487   // OrderedOpGen();
3488   // __kmpc_end_ordered(ident_t *, gtid);
3489   // Prepare arguments and build a call to __kmpc_ordered
3490   if (IsThreads) {
3491     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3492     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3493                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3494                           Args);
3495     OrderedOpGen.setAction(Action);
3496     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3497     return;
3498   }
3499   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3500 }
3501 
3502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3503   unsigned Flags;
3504   if (Kind == OMPD_for)
3505     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3506   else if (Kind == OMPD_sections)
3507     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3508   else if (Kind == OMPD_single)
3509     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3510   else if (Kind == OMPD_barrier)
3511     Flags = OMP_IDENT_BARRIER_EXPL;
3512   else
3513     Flags = OMP_IDENT_BARRIER_IMPL;
3514   return Flags;
3515 }
3516 
3517 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3518     CodeGenFunction &CGF, const OMPLoopDirective &S,
3519     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3520   // Check if the loop directive is actually a doacross loop directive. In this
3521   // case choose static, 1 schedule.
3522   if (llvm::any_of(
3523           S.getClausesOfKind<OMPOrderedClause>(),
3524           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3525     ScheduleKind = OMPC_SCHEDULE_static;
3526     // Chunk size is 1 in this case.
3527     llvm::APInt ChunkSize(32, 1);
3528     ChunkExpr = IntegerLiteral::Create(
3529         CGF.getContext(), ChunkSize,
3530         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3531         SourceLocation());
3532   }
3533 }
3534 
3535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3536                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3537                                       bool ForceSimpleCall) {
3538   // Check if we should use the OMPBuilder
3539   auto *OMPRegionInfo =
3540       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3541   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3542   if (OMPBuilder) {
3543     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3544         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3545     return;
3546   }
3547 
3548   if (!CGF.HaveInsertPoint())
3549     return;
3550   // Build call __kmpc_cancel_barrier(loc, thread_id);
3551   // Build call __kmpc_barrier(loc, thread_id);
3552   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3553   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3554   // thread_id);
3555   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3556                          getThreadID(CGF, Loc)};
3557   if (OMPRegionInfo) {
3558     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3559       llvm::Value *Result = CGF.EmitRuntimeCall(
3560           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3561       if (EmitChecks) {
3562         // if (__kmpc_cancel_barrier()) {
3563         //   exit from construct;
3564         // }
3565         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3566         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3567         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3568         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3569         CGF.EmitBlock(ExitBB);
3570         //   exit from construct;
3571         CodeGenFunction::JumpDest CancelDestination =
3572             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3573         CGF.EmitBranchThroughCleanup(CancelDestination);
3574         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3575       }
3576       return;
3577     }
3578   }
3579   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3580 }
3581 
3582 /// Map the OpenMP loop schedule to the runtime enumeration.
3583 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3584                                           bool Chunked, bool Ordered) {
3585   switch (ScheduleKind) {
3586   case OMPC_SCHEDULE_static:
3587     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3588                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3589   case OMPC_SCHEDULE_dynamic:
3590     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3591   case OMPC_SCHEDULE_guided:
3592     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3593   case OMPC_SCHEDULE_runtime:
3594     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3595   case OMPC_SCHEDULE_auto:
3596     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3597   case OMPC_SCHEDULE_unknown:
3598     assert(!Chunked && "chunk was specified but schedule kind not known");
3599     return Ordered ? OMP_ord_static : OMP_sch_static;
3600   }
3601   llvm_unreachable("Unexpected runtime schedule");
3602 }
3603 
3604 /// Map the OpenMP distribute schedule to the runtime enumeration.
3605 static OpenMPSchedType
3606 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3607   // only static is allowed for dist_schedule
3608   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3609 }
3610 
3611 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3612                                          bool Chunked) const {
3613   OpenMPSchedType Schedule =
3614       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3615   return Schedule == OMP_sch_static;
3616 }
3617 
3618 bool CGOpenMPRuntime::isStaticNonchunked(
3619     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3620   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3621   return Schedule == OMP_dist_sch_static;
3622 }
3623 
3624 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3625                                       bool Chunked) const {
3626   OpenMPSchedType Schedule =
3627       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3628   return Schedule == OMP_sch_static_chunked;
3629 }
3630 
3631 bool CGOpenMPRuntime::isStaticChunked(
3632     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3633   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3634   return Schedule == OMP_dist_sch_static_chunked;
3635 }
3636 
3637 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3638   OpenMPSchedType Schedule =
3639       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3640   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3641   return Schedule != OMP_sch_static;
3642 }
3643 
3644 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3645                                   OpenMPScheduleClauseModifier M1,
3646                                   OpenMPScheduleClauseModifier M2) {
3647   int Modifier = 0;
3648   switch (M1) {
3649   case OMPC_SCHEDULE_MODIFIER_monotonic:
3650     Modifier = OMP_sch_modifier_monotonic;
3651     break;
3652   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3653     Modifier = OMP_sch_modifier_nonmonotonic;
3654     break;
3655   case OMPC_SCHEDULE_MODIFIER_simd:
3656     if (Schedule == OMP_sch_static_chunked)
3657       Schedule = OMP_sch_static_balanced_chunked;
3658     break;
3659   case OMPC_SCHEDULE_MODIFIER_last:
3660   case OMPC_SCHEDULE_MODIFIER_unknown:
3661     break;
3662   }
3663   switch (M2) {
3664   case OMPC_SCHEDULE_MODIFIER_monotonic:
3665     Modifier = OMP_sch_modifier_monotonic;
3666     break;
3667   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3668     Modifier = OMP_sch_modifier_nonmonotonic;
3669     break;
3670   case OMPC_SCHEDULE_MODIFIER_simd:
3671     if (Schedule == OMP_sch_static_chunked)
3672       Schedule = OMP_sch_static_balanced_chunked;
3673     break;
3674   case OMPC_SCHEDULE_MODIFIER_last:
3675   case OMPC_SCHEDULE_MODIFIER_unknown:
3676     break;
3677   }
3678   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3679   // If the static schedule kind is specified or if the ordered clause is
3680   // specified, and if the nonmonotonic modifier is not specified, the effect is
3681   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3682   // modifier is specified, the effect is as if the nonmonotonic modifier is
3683   // specified.
3684   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3685     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3686           Schedule == OMP_sch_static_balanced_chunked ||
3687           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3688           Schedule == OMP_dist_sch_static_chunked ||
3689           Schedule == OMP_dist_sch_static))
3690       Modifier = OMP_sch_modifier_nonmonotonic;
3691   }
3692   return Schedule | Modifier;
3693 }
3694 
3695 void CGOpenMPRuntime::emitForDispatchInit(
3696     CodeGenFunction &CGF, SourceLocation Loc,
3697     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3698     bool Ordered, const DispatchRTInput &DispatchValues) {
3699   if (!CGF.HaveInsertPoint())
3700     return;
3701   OpenMPSchedType Schedule = getRuntimeSchedule(
3702       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3703   assert(Ordered ||
3704          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3705           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3706           Schedule != OMP_sch_static_balanced_chunked));
3707   // Call __kmpc_dispatch_init(
3708   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3709   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3710   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3711 
3712   // If the Chunk was not specified in the clause - use default value 1.
3713   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3714                                             : CGF.Builder.getIntN(IVSize, 1);
3715   llvm::Value *Args[] = {
3716       emitUpdateLocation(CGF, Loc),
3717       getThreadID(CGF, Loc),
3718       CGF.Builder.getInt32(addMonoNonMonoModifier(
3719           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3720       DispatchValues.LB,                                     // Lower
3721       DispatchValues.UB,                                     // Upper
3722       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3723       Chunk                                                  // Chunk
3724   };
3725   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3726 }
3727 
3728 static void emitForStaticInitCall(
3729     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3730     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3731     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3732     const CGOpenMPRuntime::StaticRTInput &Values) {
3733   if (!CGF.HaveInsertPoint())
3734     return;
3735 
3736   assert(!Values.Ordered);
3737   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3738          Schedule == OMP_sch_static_balanced_chunked ||
3739          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3740          Schedule == OMP_dist_sch_static ||
3741          Schedule == OMP_dist_sch_static_chunked);
3742 
3743   // Call __kmpc_for_static_init(
3744   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3745   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3746   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3747   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3748   llvm::Value *Chunk = Values.Chunk;
3749   if (Chunk == nullptr) {
3750     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3751             Schedule == OMP_dist_sch_static) &&
3752            "expected static non-chunked schedule");
3753     // If the Chunk was not specified in the clause - use default value 1.
3754     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3755   } else {
3756     assert((Schedule == OMP_sch_static_chunked ||
3757             Schedule == OMP_sch_static_balanced_chunked ||
3758             Schedule == OMP_ord_static_chunked ||
3759             Schedule == OMP_dist_sch_static_chunked) &&
3760            "expected static chunked schedule");
3761   }
3762   llvm::Value *Args[] = {
3763       UpdateLocation,
3764       ThreadId,
3765       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3766                                                   M2)), // Schedule type
3767       Values.IL.getPointer(),                           // &isLastIter
3768       Values.LB.getPointer(),                           // &LB
3769       Values.UB.getPointer(),                           // &UB
3770       Values.ST.getPointer(),                           // &Stride
3771       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3772       Chunk                                             // Chunk
3773   };
3774   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3775 }
3776 
3777 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3778                                         SourceLocation Loc,
3779                                         OpenMPDirectiveKind DKind,
3780                                         const OpenMPScheduleTy &ScheduleKind,
3781                                         const StaticRTInput &Values) {
3782   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3783       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3784   assert(isOpenMPWorksharingDirective(DKind) &&
3785          "Expected loop-based or sections-based directive.");
3786   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3787                                              isOpenMPLoopDirective(DKind)
3788                                                  ? OMP_IDENT_WORK_LOOP
3789                                                  : OMP_IDENT_WORK_SECTIONS);
3790   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3791   llvm::FunctionCallee StaticInitFunction =
3792       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3793   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3794   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3795                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3796 }
3797 
3798 void CGOpenMPRuntime::emitDistributeStaticInit(
3799     CodeGenFunction &CGF, SourceLocation Loc,
3800     OpenMPDistScheduleClauseKind SchedKind,
3801     const CGOpenMPRuntime::StaticRTInput &Values) {
3802   OpenMPSchedType ScheduleNum =
3803       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3804   llvm::Value *UpdatedLocation =
3805       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3806   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3807   llvm::FunctionCallee StaticInitFunction =
3808       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3809   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3810                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3811                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3812 }
3813 
3814 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3815                                           SourceLocation Loc,
3816                                           OpenMPDirectiveKind DKind) {
3817   if (!CGF.HaveInsertPoint())
3818     return;
3819   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3820   llvm::Value *Args[] = {
3821       emitUpdateLocation(CGF, Loc,
3822                          isOpenMPDistributeDirective(DKind)
3823                              ? OMP_IDENT_WORK_DISTRIBUTE
3824                              : isOpenMPLoopDirective(DKind)
3825                                    ? OMP_IDENT_WORK_LOOP
3826                                    : OMP_IDENT_WORK_SECTIONS),
3827       getThreadID(CGF, Loc)};
3828   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3829   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3830                       Args);
3831 }
3832 
3833 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3834                                                  SourceLocation Loc,
3835                                                  unsigned IVSize,
3836                                                  bool IVSigned) {
3837   if (!CGF.HaveInsertPoint())
3838     return;
3839   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3840   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3841   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3842 }
3843 
3844 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3845                                           SourceLocation Loc, unsigned IVSize,
3846                                           bool IVSigned, Address IL,
3847                                           Address LB, Address UB,
3848                                           Address ST) {
3849   // Call __kmpc_dispatch_next(
3850   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3851   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3852   //          kmp_int[32|64] *p_stride);
3853   llvm::Value *Args[] = {
3854       emitUpdateLocation(CGF, Loc),
3855       getThreadID(CGF, Loc),
3856       IL.getPointer(), // &isLastIter
3857       LB.getPointer(), // &Lower
3858       UB.getPointer(), // &Upper
3859       ST.getPointer()  // &Stride
3860   };
3861   llvm::Value *Call =
3862       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3863   return CGF.EmitScalarConversion(
3864       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3865       CGF.getContext().BoolTy, Loc);
3866 }
3867 
3868 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3869                                            llvm::Value *NumThreads,
3870                                            SourceLocation Loc) {
3871   if (!CGF.HaveInsertPoint())
3872     return;
3873   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3874   llvm::Value *Args[] = {
3875       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3876       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3877   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3878                       Args);
3879 }
3880 
3881 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3882                                          ProcBindKind ProcBind,
3883                                          SourceLocation Loc) {
3884   if (!CGF.HaveInsertPoint())
3885     return;
3886   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3887   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3888   llvm::Value *Args[] = {
3889       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3890       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3891   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3892 }
3893 
3894 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3895                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3896   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3897   if (OMPBuilder) {
3898     OMPBuilder->CreateFlush(CGF.Builder);
3899   } else {
3900     if (!CGF.HaveInsertPoint())
3901       return;
3902     // Build call void __kmpc_flush(ident_t *loc)
3903     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3904                         emitUpdateLocation(CGF, Loc));
3905   }
3906 }
3907 
3908 namespace {
3909 /// Indexes of fields for type kmp_task_t.
3910 enum KmpTaskTFields {
3911   /// List of shared variables.
3912   KmpTaskTShareds,
3913   /// Task routine.
3914   KmpTaskTRoutine,
3915   /// Partition id for the untied tasks.
3916   KmpTaskTPartId,
3917   /// Function with call of destructors for private variables.
3918   Data1,
3919   /// Task priority.
3920   Data2,
3921   /// (Taskloops only) Lower bound.
3922   KmpTaskTLowerBound,
3923   /// (Taskloops only) Upper bound.
3924   KmpTaskTUpperBound,
3925   /// (Taskloops only) Stride.
3926   KmpTaskTStride,
3927   /// (Taskloops only) Is last iteration flag.
3928   KmpTaskTLastIter,
3929   /// (Taskloops only) Reduction data.
3930   KmpTaskTReductions,
3931 };
3932 } // anonymous namespace
3933 
3934 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3935   return OffloadEntriesTargetRegion.empty() &&
3936          OffloadEntriesDeviceGlobalVar.empty();
3937 }
3938 
3939 /// Initialize target region entry.
3940 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3941     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3942                                     StringRef ParentName, unsigned LineNum,
3943                                     unsigned Order) {
3944   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3945                                              "only required for the device "
3946                                              "code generation.");
3947   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3948       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3949                                    OMPTargetRegionEntryTargetRegion);
3950   ++OffloadingEntriesNum;
3951 }
3952 
3953 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3954     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3955                                   StringRef ParentName, unsigned LineNum,
3956                                   llvm::Constant *Addr, llvm::Constant *ID,
3957                                   OMPTargetRegionEntryKind Flags) {
3958   // If we are emitting code for a target, the entry is already initialized,
3959   // only has to be registered.
3960   if (CGM.getLangOpts().OpenMPIsDevice) {
3961     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3962       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3963           DiagnosticsEngine::Error,
3964           "Unable to find target region on line '%0' in the device code.");
3965       CGM.getDiags().Report(DiagID) << LineNum;
3966       return;
3967     }
3968     auto &Entry =
3969         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3970     assert(Entry.isValid() && "Entry not initialized!");
3971     Entry.setAddress(Addr);
3972     Entry.setID(ID);
3973     Entry.setFlags(Flags);
3974   } else {
3975     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3976     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3977     ++OffloadingEntriesNum;
3978   }
3979 }
3980 
3981 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3982     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3983     unsigned LineNum) const {
3984   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3985   if (PerDevice == OffloadEntriesTargetRegion.end())
3986     return false;
3987   auto PerFile = PerDevice->second.find(FileID);
3988   if (PerFile == PerDevice->second.end())
3989     return false;
3990   auto PerParentName = PerFile->second.find(ParentName);
3991   if (PerParentName == PerFile->second.end())
3992     return false;
3993   auto PerLine = PerParentName->second.find(LineNum);
3994   if (PerLine == PerParentName->second.end())
3995     return false;
3996   // Fail if this entry is already registered.
3997   if (PerLine->second.getAddress() || PerLine->second.getID())
3998     return false;
3999   return true;
4000 }
4001 
4002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4003     const OffloadTargetRegionEntryInfoActTy &Action) {
4004   // Scan all target region entries and perform the provided action.
4005   for (const auto &D : OffloadEntriesTargetRegion)
4006     for (const auto &F : D.second)
4007       for (const auto &P : F.second)
4008         for (const auto &L : P.second)
4009           Action(D.first, F.first, P.first(), L.first, L.second);
4010 }
4011 
4012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4013     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4014                                        OMPTargetGlobalVarEntryKind Flags,
4015                                        unsigned Order) {
4016   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4017                                              "only required for the device "
4018                                              "code generation.");
4019   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4020   ++OffloadingEntriesNum;
4021 }
4022 
4023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4024     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4025                                      CharUnits VarSize,
4026                                      OMPTargetGlobalVarEntryKind Flags,
4027                                      llvm::GlobalValue::LinkageTypes Linkage) {
4028   if (CGM.getLangOpts().OpenMPIsDevice) {
4029     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4030     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4031            "Entry not initialized!");
4032     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4033            "Resetting with the new address.");
4034     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4035       if (Entry.getVarSize().isZero()) {
4036         Entry.setVarSize(VarSize);
4037         Entry.setLinkage(Linkage);
4038       }
4039       return;
4040     }
4041     Entry.setVarSize(VarSize);
4042     Entry.setLinkage(Linkage);
4043     Entry.setAddress(Addr);
4044   } else {
4045     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4046       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4047       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4048              "Entry not initialized!");
4049       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4050              "Resetting with the new address.");
4051       if (Entry.getVarSize().isZero()) {
4052         Entry.setVarSize(VarSize);
4053         Entry.setLinkage(Linkage);
4054       }
4055       return;
4056     }
4057     OffloadEntriesDeviceGlobalVar.try_emplace(
4058         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4059     ++OffloadingEntriesNum;
4060   }
4061 }
4062 
4063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4064     actOnDeviceGlobalVarEntriesInfo(
4065         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4066   // Scan all target region entries and perform the provided action.
4067   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4068     Action(E.getKey(), E.getValue());
4069 }
4070 
4071 void CGOpenMPRuntime::createOffloadEntry(
4072     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4073     llvm::GlobalValue::LinkageTypes Linkage) {
4074   StringRef Name = Addr->getName();
4075   llvm::Module &M = CGM.getModule();
4076   llvm::LLVMContext &C = M.getContext();
4077 
4078   // Create constant string with the name.
4079   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4080 
4081   std::string StringName = getName({"omp_offloading", "entry_name"});
4082   auto *Str = new llvm::GlobalVariable(
4083       M, StrPtrInit->getType(), /*isConstant=*/true,
4084       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4085   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4086 
4087   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4088                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4089                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4090                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4091                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4092   std::string EntryName = getName({"omp_offloading", "entry", ""});
4093   llvm::GlobalVariable *Entry = createGlobalStruct(
4094       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4095       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4096 
4097   // The entry has to be created in the section the linker expects it to be.
4098   Entry->setSection("omp_offloading_entries");
4099 }
4100 
4101 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4102   // Emit the offloading entries and metadata so that the device codegen side
4103   // can easily figure out what to emit. The produced metadata looks like
4104   // this:
4105   //
4106   // !omp_offload.info = !{!1, ...}
4107   //
4108   // Right now we only generate metadata for function that contain target
4109   // regions.
4110 
4111   // If we are in simd mode or there are no entries, we don't need to do
4112   // anything.
4113   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4114     return;
4115 
4116   llvm::Module &M = CGM.getModule();
4117   llvm::LLVMContext &C = M.getContext();
4118   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4119                          SourceLocation, StringRef>,
4120               16>
4121       OrderedEntries(OffloadEntriesInfoManager.size());
4122   llvm::SmallVector<StringRef, 16> ParentFunctions(
4123       OffloadEntriesInfoManager.size());
4124 
4125   // Auxiliary methods to create metadata values and strings.
4126   auto &&GetMDInt = [this](unsigned V) {
4127     return llvm::ConstantAsMetadata::get(
4128         llvm::ConstantInt::get(CGM.Int32Ty, V));
4129   };
4130 
4131   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4132 
4133   // Create the offloading info metadata node.
4134   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4135 
4136   // Create function that emits metadata for each target region entry;
4137   auto &&TargetRegionMetadataEmitter =
4138       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4139        &GetMDString](
4140           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4141           unsigned Line,
4142           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4143         // Generate metadata for target regions. Each entry of this metadata
4144         // contains:
4145         // - Entry 0 -> Kind of this type of metadata (0).
4146         // - Entry 1 -> Device ID of the file where the entry was identified.
4147         // - Entry 2 -> File ID of the file where the entry was identified.
4148         // - Entry 3 -> Mangled name of the function where the entry was
4149         // identified.
4150         // - Entry 4 -> Line in the file where the entry was identified.
4151         // - Entry 5 -> Order the entry was created.
4152         // The first element of the metadata node is the kind.
4153         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4154                                  GetMDInt(FileID),      GetMDString(ParentName),
4155                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4156 
4157         SourceLocation Loc;
4158         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4159                   E = CGM.getContext().getSourceManager().fileinfo_end();
4160              I != E; ++I) {
4161           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4162               I->getFirst()->getUniqueID().getFile() == FileID) {
4163             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4164                 I->getFirst(), Line, 1);
4165             break;
4166           }
4167         }
4168         // Save this entry in the right position of the ordered entries array.
4169         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4170         ParentFunctions[E.getOrder()] = ParentName;
4171 
4172         // Add metadata to the named metadata node.
4173         MD->addOperand(llvm::MDNode::get(C, Ops));
4174       };
4175 
4176   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4177       TargetRegionMetadataEmitter);
4178 
4179   // Create function that emits metadata for each device global variable entry;
4180   auto &&DeviceGlobalVarMetadataEmitter =
4181       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4182        MD](StringRef MangledName,
4183            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4184                &E) {
4185         // Generate metadata for global variables. Each entry of this metadata
4186         // contains:
4187         // - Entry 0 -> Kind of this type of metadata (1).
4188         // - Entry 1 -> Mangled name of the variable.
4189         // - Entry 2 -> Declare target kind.
4190         // - Entry 3 -> Order the entry was created.
4191         // The first element of the metadata node is the kind.
4192         llvm::Metadata *Ops[] = {
4193             GetMDInt(E.getKind()), GetMDString(MangledName),
4194             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4195 
4196         // Save this entry in the right position of the ordered entries array.
4197         OrderedEntries[E.getOrder()] =
4198             std::make_tuple(&E, SourceLocation(), MangledName);
4199 
4200         // Add metadata to the named metadata node.
4201         MD->addOperand(llvm::MDNode::get(C, Ops));
4202       };
4203 
4204   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4205       DeviceGlobalVarMetadataEmitter);
4206 
4207   for (const auto &E : OrderedEntries) {
4208     assert(std::get<0>(E) && "All ordered entries must exist!");
4209     if (const auto *CE =
4210             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4211                 std::get<0>(E))) {
4212       if (!CE->getID() || !CE->getAddress()) {
4213         // Do not blame the entry if the parent funtion is not emitted.
4214         StringRef FnName = ParentFunctions[CE->getOrder()];
4215         if (!CGM.GetGlobalValue(FnName))
4216           continue;
4217         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4218             DiagnosticsEngine::Error,
4219             "Offloading entry for target region in %0 is incorrect: either the "
4220             "address or the ID is invalid.");
4221         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4222         continue;
4223       }
4224       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4225                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4226     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4227                                              OffloadEntryInfoDeviceGlobalVar>(
4228                    std::get<0>(E))) {
4229       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4230           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4231               CE->getFlags());
4232       switch (Flags) {
4233       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4234         if (CGM.getLangOpts().OpenMPIsDevice &&
4235             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4236           continue;
4237         if (!CE->getAddress()) {
4238           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4239               DiagnosticsEngine::Error, "Offloading entry for declare target "
4240                                         "variable %0 is incorrect: the "
4241                                         "address is invalid.");
4242           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4243           continue;
4244         }
4245         // The vaiable has no definition - no need to add the entry.
4246         if (CE->getVarSize().isZero())
4247           continue;
4248         break;
4249       }
4250       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4251         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4252                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4253                "Declaret target link address is set.");
4254         if (CGM.getLangOpts().OpenMPIsDevice)
4255           continue;
4256         if (!CE->getAddress()) {
4257           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4258               DiagnosticsEngine::Error,
4259               "Offloading entry for declare target variable is incorrect: the "
4260               "address is invalid.");
4261           CGM.getDiags().Report(DiagID);
4262           continue;
4263         }
4264         break;
4265       }
4266       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4267                          CE->getVarSize().getQuantity(), Flags,
4268                          CE->getLinkage());
4269     } else {
4270       llvm_unreachable("Unsupported entry kind.");
4271     }
4272   }
4273 }
4274 
4275 /// Loads all the offload entries information from the host IR
4276 /// metadata.
4277 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4278   // If we are in target mode, load the metadata from the host IR. This code has
4279   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4280 
4281   if (!CGM.getLangOpts().OpenMPIsDevice)
4282     return;
4283 
4284   if (CGM.getLangOpts().OMPHostIRFile.empty())
4285     return;
4286 
4287   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4288   if (auto EC = Buf.getError()) {
4289     CGM.getDiags().Report(diag::err_cannot_open_file)
4290         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4291     return;
4292   }
4293 
4294   llvm::LLVMContext C;
4295   auto ME = expectedToErrorOrAndEmitErrors(
4296       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4297 
4298   if (auto EC = ME.getError()) {
4299     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4300         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4301     CGM.getDiags().Report(DiagID)
4302         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4303     return;
4304   }
4305 
4306   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4307   if (!MD)
4308     return;
4309 
4310   for (llvm::MDNode *MN : MD->operands()) {
4311     auto &&GetMDInt = [MN](unsigned Idx) {
4312       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4313       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4314     };
4315 
4316     auto &&GetMDString = [MN](unsigned Idx) {
4317       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4318       return V->getString();
4319     };
4320 
4321     switch (GetMDInt(0)) {
4322     default:
4323       llvm_unreachable("Unexpected metadata!");
4324       break;
4325     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4326         OffloadingEntryInfoTargetRegion:
4327       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4328           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4329           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4330           /*Order=*/GetMDInt(5));
4331       break;
4332     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4333         OffloadingEntryInfoDeviceGlobalVar:
4334       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4335           /*MangledName=*/GetMDString(1),
4336           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4337               /*Flags=*/GetMDInt(2)),
4338           /*Order=*/GetMDInt(3));
4339       break;
4340     }
4341   }
4342 }
4343 
4344 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4345   if (!KmpRoutineEntryPtrTy) {
4346     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4347     ASTContext &C = CGM.getContext();
4348     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4349     FunctionProtoType::ExtProtoInfo EPI;
4350     KmpRoutineEntryPtrQTy = C.getPointerType(
4351         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4352     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4353   }
4354 }
4355 
4356 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4357   // Make sure the type of the entry is already created. This is the type we
4358   // have to create:
4359   // struct __tgt_offload_entry{
4360   //   void      *addr;       // Pointer to the offload entry info.
4361   //                          // (function or global)
4362   //   char      *name;       // Name of the function or global.
4363   //   size_t     size;       // Size of the entry info (0 if it a function).
4364   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4365   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4366   // };
4367   if (TgtOffloadEntryQTy.isNull()) {
4368     ASTContext &C = CGM.getContext();
4369     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4370     RD->startDefinition();
4371     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4372     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4373     addFieldToRecordDecl(C, RD, C.getSizeType());
4374     addFieldToRecordDecl(
4375         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4376     addFieldToRecordDecl(
4377         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4378     RD->completeDefinition();
4379     RD->addAttr(PackedAttr::CreateImplicit(C));
4380     TgtOffloadEntryQTy = C.getRecordType(RD);
4381   }
4382   return TgtOffloadEntryQTy;
4383 }
4384 
4385 namespace {
4386 struct PrivateHelpersTy {
4387   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4388                    const VarDecl *PrivateElemInit)
4389       : Original(Original), PrivateCopy(PrivateCopy),
4390         PrivateElemInit(PrivateElemInit) {}
4391   const VarDecl *Original;
4392   const VarDecl *PrivateCopy;
4393   const VarDecl *PrivateElemInit;
4394 };
4395 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4396 } // anonymous namespace
4397 
4398 static RecordDecl *
4399 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4400   if (!Privates.empty()) {
4401     ASTContext &C = CGM.getContext();
4402     // Build struct .kmp_privates_t. {
4403     //         /*  private vars  */
4404     //       };
4405     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4406     RD->startDefinition();
4407     for (const auto &Pair : Privates) {
4408       const VarDecl *VD = Pair.second.Original;
4409       QualType Type = VD->getType().getNonReferenceType();
4410       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4411       if (VD->hasAttrs()) {
4412         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4413              E(VD->getAttrs().end());
4414              I != E; ++I)
4415           FD->addAttr(*I);
4416       }
4417     }
4418     RD->completeDefinition();
4419     return RD;
4420   }
4421   return nullptr;
4422 }
4423 
4424 static RecordDecl *
4425 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4426                          QualType KmpInt32Ty,
4427                          QualType KmpRoutineEntryPointerQTy) {
4428   ASTContext &C = CGM.getContext();
4429   // Build struct kmp_task_t {
4430   //         void *              shareds;
4431   //         kmp_routine_entry_t routine;
4432   //         kmp_int32           part_id;
4433   //         kmp_cmplrdata_t data1;
4434   //         kmp_cmplrdata_t data2;
4435   // For taskloops additional fields:
4436   //         kmp_uint64          lb;
4437   //         kmp_uint64          ub;
4438   //         kmp_int64           st;
4439   //         kmp_int32           liter;
4440   //         void *              reductions;
4441   //       };
4442   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4443   UD->startDefinition();
4444   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4445   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4446   UD->completeDefinition();
4447   QualType KmpCmplrdataTy = C.getRecordType(UD);
4448   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4449   RD->startDefinition();
4450   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4451   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4452   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4453   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4454   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4455   if (isOpenMPTaskLoopDirective(Kind)) {
4456     QualType KmpUInt64Ty =
4457         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4458     QualType KmpInt64Ty =
4459         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4460     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4461     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4462     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4463     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4464     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4465   }
4466   RD->completeDefinition();
4467   return RD;
4468 }
4469 
4470 static RecordDecl *
4471 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4472                                      ArrayRef<PrivateDataTy> Privates) {
4473   ASTContext &C = CGM.getContext();
4474   // Build struct kmp_task_t_with_privates {
4475   //         kmp_task_t task_data;
4476   //         .kmp_privates_t. privates;
4477   //       };
4478   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4479   RD->startDefinition();
4480   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4481   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4482     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4483   RD->completeDefinition();
4484   return RD;
4485 }
4486 
4487 /// Emit a proxy function which accepts kmp_task_t as the second
4488 /// argument.
4489 /// \code
4490 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4491 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4492 ///   For taskloops:
4493 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4494 ///   tt->reductions, tt->shareds);
4495 ///   return 0;
4496 /// }
4497 /// \endcode
4498 static llvm::Function *
4499 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4500                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4501                       QualType KmpTaskTWithPrivatesPtrQTy,
4502                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4503                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4504                       llvm::Value *TaskPrivatesMap) {
4505   ASTContext &C = CGM.getContext();
4506   FunctionArgList Args;
4507   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4508                             ImplicitParamDecl::Other);
4509   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4510                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4511                                 ImplicitParamDecl::Other);
4512   Args.push_back(&GtidArg);
4513   Args.push_back(&TaskTypeArg);
4514   const auto &TaskEntryFnInfo =
4515       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4516   llvm::FunctionType *TaskEntryTy =
4517       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4518   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4519   auto *TaskEntry = llvm::Function::Create(
4520       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4521   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4522   TaskEntry->setDoesNotRecurse();
4523   CodeGenFunction CGF(CGM);
4524   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4525                     Loc, Loc);
4526 
4527   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4528   // tt,
4529   // For taskloops:
4530   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4531   // tt->task_data.shareds);
4532   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4533       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4534   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4535       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4536       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4537   const auto *KmpTaskTWithPrivatesQTyRD =
4538       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4539   LValue Base =
4540       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4541   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4542   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4543   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4544   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4545 
4546   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4547   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4548   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4549       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4550       CGF.ConvertTypeForMem(SharedsPtrTy));
4551 
4552   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4553   llvm::Value *PrivatesParam;
4554   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4555     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4556     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4557         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4558   } else {
4559     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4560   }
4561 
4562   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4563                                TaskPrivatesMap,
4564                                CGF.Builder
4565                                    .CreatePointerBitCastOrAddrSpaceCast(
4566                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4567                                    .getPointer()};
4568   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4569                                           std::end(CommonArgs));
4570   if (isOpenMPTaskLoopDirective(Kind)) {
4571     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4572     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4573     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4574     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4575     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4576     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4577     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4578     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4579     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4580     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4581     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4582     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4583     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4584     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4585     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4586     CallArgs.push_back(LBParam);
4587     CallArgs.push_back(UBParam);
4588     CallArgs.push_back(StParam);
4589     CallArgs.push_back(LIParam);
4590     CallArgs.push_back(RParam);
4591   }
4592   CallArgs.push_back(SharedsParam);
4593 
4594   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4595                                                   CallArgs);
4596   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4597                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4598   CGF.FinishFunction();
4599   return TaskEntry;
4600 }
4601 
4602 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4603                                             SourceLocation Loc,
4604                                             QualType KmpInt32Ty,
4605                                             QualType KmpTaskTWithPrivatesPtrQTy,
4606                                             QualType KmpTaskTWithPrivatesQTy) {
4607   ASTContext &C = CGM.getContext();
4608   FunctionArgList Args;
4609   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4610                             ImplicitParamDecl::Other);
4611   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4612                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4613                                 ImplicitParamDecl::Other);
4614   Args.push_back(&GtidArg);
4615   Args.push_back(&TaskTypeArg);
4616   const auto &DestructorFnInfo =
4617       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4618   llvm::FunctionType *DestructorFnTy =
4619       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4620   std::string Name =
4621       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4622   auto *DestructorFn =
4623       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4624                              Name, &CGM.getModule());
4625   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4626                                     DestructorFnInfo);
4627   DestructorFn->setDoesNotRecurse();
4628   CodeGenFunction CGF(CGM);
4629   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4630                     Args, Loc, Loc);
4631 
4632   LValue Base = CGF.EmitLoadOfPointerLValue(
4633       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4634       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4635   const auto *KmpTaskTWithPrivatesQTyRD =
4636       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4637   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4638   Base = CGF.EmitLValueForField(Base, *FI);
4639   for (const auto *Field :
4640        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4641     if (QualType::DestructionKind DtorKind =
4642             Field->getType().isDestructedType()) {
4643       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4644       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4645     }
4646   }
4647   CGF.FinishFunction();
4648   return DestructorFn;
4649 }
4650 
4651 /// Emit a privates mapping function for correct handling of private and
4652 /// firstprivate variables.
4653 /// \code
4654 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4655 /// **noalias priv1,...,  <tyn> **noalias privn) {
4656 ///   *priv1 = &.privates.priv1;
4657 ///   ...;
4658 ///   *privn = &.privates.privn;
4659 /// }
4660 /// \endcode
4661 static llvm::Value *
4662 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4663                                ArrayRef<const Expr *> PrivateVars,
4664                                ArrayRef<const Expr *> FirstprivateVars,
4665                                ArrayRef<const Expr *> LastprivateVars,
4666                                QualType PrivatesQTy,
4667                                ArrayRef<PrivateDataTy> Privates) {
4668   ASTContext &C = CGM.getContext();
4669   FunctionArgList Args;
4670   ImplicitParamDecl TaskPrivatesArg(
4671       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4672       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4673       ImplicitParamDecl::Other);
4674   Args.push_back(&TaskPrivatesArg);
4675   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4676   unsigned Counter = 1;
4677   for (const Expr *E : PrivateVars) {
4678     Args.push_back(ImplicitParamDecl::Create(
4679         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4680         C.getPointerType(C.getPointerType(E->getType()))
4681             .withConst()
4682             .withRestrict(),
4683         ImplicitParamDecl::Other));
4684     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4685     PrivateVarsPos[VD] = Counter;
4686     ++Counter;
4687   }
4688   for (const Expr *E : FirstprivateVars) {
4689     Args.push_back(ImplicitParamDecl::Create(
4690         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4691         C.getPointerType(C.getPointerType(E->getType()))
4692             .withConst()
4693             .withRestrict(),
4694         ImplicitParamDecl::Other));
4695     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4696     PrivateVarsPos[VD] = Counter;
4697     ++Counter;
4698   }
4699   for (const Expr *E : LastprivateVars) {
4700     Args.push_back(ImplicitParamDecl::Create(
4701         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4702         C.getPointerType(C.getPointerType(E->getType()))
4703             .withConst()
4704             .withRestrict(),
4705         ImplicitParamDecl::Other));
4706     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4707     PrivateVarsPos[VD] = Counter;
4708     ++Counter;
4709   }
4710   const auto &TaskPrivatesMapFnInfo =
4711       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4712   llvm::FunctionType *TaskPrivatesMapTy =
4713       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4714   std::string Name =
4715       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4716   auto *TaskPrivatesMap = llvm::Function::Create(
4717       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4718       &CGM.getModule());
4719   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4720                                     TaskPrivatesMapFnInfo);
4721   if (CGM.getLangOpts().Optimize) {
4722     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4723     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4724     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4725   }
4726   CodeGenFunction CGF(CGM);
4727   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4728                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4729 
4730   // *privi = &.privates.privi;
4731   LValue Base = CGF.EmitLoadOfPointerLValue(
4732       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4733       TaskPrivatesArg.getType()->castAs<PointerType>());
4734   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4735   Counter = 0;
4736   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4737     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4738     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4739     LValue RefLVal =
4740         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4741     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4742         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4743     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4744     ++Counter;
4745   }
4746   CGF.FinishFunction();
4747   return TaskPrivatesMap;
4748 }
4749 
4750 /// Emit initialization for private variables in task-based directives.
4751 static void emitPrivatesInit(CodeGenFunction &CGF,
4752                              const OMPExecutableDirective &D,
4753                              Address KmpTaskSharedsPtr, LValue TDBase,
4754                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4755                              QualType SharedsTy, QualType SharedsPtrTy,
4756                              const OMPTaskDataTy &Data,
4757                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4758   ASTContext &C = CGF.getContext();
4759   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4760   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4761   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4762                                  ? OMPD_taskloop
4763                                  : OMPD_task;
4764   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4765   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4766   LValue SrcBase;
4767   bool IsTargetTask =
4768       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4769       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4770   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4771   // PointersArray and SizesArray. The original variables for these arrays are
4772   // not captured and we get their addresses explicitly.
4773   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4774       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4775     SrcBase = CGF.MakeAddrLValue(
4776         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4777             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4778         SharedsTy);
4779   }
4780   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4781   for (const PrivateDataTy &Pair : Privates) {
4782     const VarDecl *VD = Pair.second.PrivateCopy;
4783     const Expr *Init = VD->getAnyInitializer();
4784     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4785                              !CGF.isTrivialInitializer(Init)))) {
4786       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4787       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4788         const VarDecl *OriginalVD = Pair.second.Original;
4789         // Check if the variable is the target-based BasePointersArray,
4790         // PointersArray or SizesArray.
4791         LValue SharedRefLValue;
4792         QualType Type = PrivateLValue.getType();
4793         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4794         if (IsTargetTask && !SharedField) {
4795           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4796                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4797                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4798                          ->getNumParams() == 0 &&
4799                  isa<TranslationUnitDecl>(
4800                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4801                          ->getDeclContext()) &&
4802                  "Expected artificial target data variable.");
4803           SharedRefLValue =
4804               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4805         } else {
4806           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4807           SharedRefLValue = CGF.MakeAddrLValue(
4808               Address(SharedRefLValue.getPointer(CGF),
4809                       C.getDeclAlign(OriginalVD)),
4810               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4811               SharedRefLValue.getTBAAInfo());
4812         }
4813         if (Type->isArrayType()) {
4814           // Initialize firstprivate array.
4815           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4816             // Perform simple memcpy.
4817             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4818           } else {
4819             // Initialize firstprivate array using element-by-element
4820             // initialization.
4821             CGF.EmitOMPAggregateAssign(
4822                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4823                 Type,
4824                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4825                                                   Address SrcElement) {
4826                   // Clean up any temporaries needed by the initialization.
4827                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4828                   InitScope.addPrivate(
4829                       Elem, [SrcElement]() -> Address { return SrcElement; });
4830                   (void)InitScope.Privatize();
4831                   // Emit initialization for single element.
4832                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4833                       CGF, &CapturesInfo);
4834                   CGF.EmitAnyExprToMem(Init, DestElement,
4835                                        Init->getType().getQualifiers(),
4836                                        /*IsInitializer=*/false);
4837                 });
4838           }
4839         } else {
4840           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4841           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4842             return SharedRefLValue.getAddress(CGF);
4843           });
4844           (void)InitScope.Privatize();
4845           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4846           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4847                              /*capturedByInit=*/false);
4848         }
4849       } else {
4850         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4851       }
4852     }
4853     ++FI;
4854   }
4855 }
4856 
4857 /// Check if duplication function is required for taskloops.
4858 static bool checkInitIsRequired(CodeGenFunction &CGF,
4859                                 ArrayRef<PrivateDataTy> Privates) {
4860   bool InitRequired = false;
4861   for (const PrivateDataTy &Pair : Privates) {
4862     const VarDecl *VD = Pair.second.PrivateCopy;
4863     const Expr *Init = VD->getAnyInitializer();
4864     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4865                                     !CGF.isTrivialInitializer(Init));
4866     if (InitRequired)
4867       break;
4868   }
4869   return InitRequired;
4870 }
4871 
4872 
4873 /// Emit task_dup function (for initialization of
4874 /// private/firstprivate/lastprivate vars and last_iter flag)
4875 /// \code
4876 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4877 /// lastpriv) {
4878 /// // setup lastprivate flag
4879 ///    task_dst->last = lastpriv;
4880 /// // could be constructor calls here...
4881 /// }
4882 /// \endcode
4883 static llvm::Value *
4884 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4885                     const OMPExecutableDirective &D,
4886                     QualType KmpTaskTWithPrivatesPtrQTy,
4887                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4888                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4889                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4890                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4891   ASTContext &C = CGM.getContext();
4892   FunctionArgList Args;
4893   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4894                            KmpTaskTWithPrivatesPtrQTy,
4895                            ImplicitParamDecl::Other);
4896   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4897                            KmpTaskTWithPrivatesPtrQTy,
4898                            ImplicitParamDecl::Other);
4899   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4900                                 ImplicitParamDecl::Other);
4901   Args.push_back(&DstArg);
4902   Args.push_back(&SrcArg);
4903   Args.push_back(&LastprivArg);
4904   const auto &TaskDupFnInfo =
4905       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4906   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4907   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4908   auto *TaskDup = llvm::Function::Create(
4909       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4910   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4911   TaskDup->setDoesNotRecurse();
4912   CodeGenFunction CGF(CGM);
4913   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4914                     Loc);
4915 
4916   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4917       CGF.GetAddrOfLocalVar(&DstArg),
4918       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4919   // task_dst->liter = lastpriv;
4920   if (WithLastIter) {
4921     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4922     LValue Base = CGF.EmitLValueForField(
4923         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4924     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4925     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4926         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4927     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4928   }
4929 
4930   // Emit initial values for private copies (if any).
4931   assert(!Privates.empty());
4932   Address KmpTaskSharedsPtr = Address::invalid();
4933   if (!Data.FirstprivateVars.empty()) {
4934     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4935         CGF.GetAddrOfLocalVar(&SrcArg),
4936         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4937     LValue Base = CGF.EmitLValueForField(
4938         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4939     KmpTaskSharedsPtr = Address(
4940         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4941                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4942                                                   KmpTaskTShareds)),
4943                              Loc),
4944         CGF.getNaturalTypeAlignment(SharedsTy));
4945   }
4946   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4947                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4948   CGF.FinishFunction();
4949   return TaskDup;
4950 }
4951 
4952 /// Checks if destructor function is required to be generated.
4953 /// \return true if cleanups are required, false otherwise.
4954 static bool
4955 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4956   bool NeedsCleanup = false;
4957   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4958   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4959   for (const FieldDecl *FD : PrivateRD->fields()) {
4960     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4961     if (NeedsCleanup)
4962       break;
4963   }
4964   return NeedsCleanup;
4965 }
4966 
4967 CGOpenMPRuntime::TaskResultTy
4968 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4969                               const OMPExecutableDirective &D,
4970                               llvm::Function *TaskFunction, QualType SharedsTy,
4971                               Address Shareds, const OMPTaskDataTy &Data) {
4972   ASTContext &C = CGM.getContext();
4973   llvm::SmallVector<PrivateDataTy, 4> Privates;
4974   // Aggregate privates and sort them by the alignment.
4975   auto I = Data.PrivateCopies.begin();
4976   for (const Expr *E : Data.PrivateVars) {
4977     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4978     Privates.emplace_back(
4979         C.getDeclAlign(VD),
4980         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4981                          /*PrivateElemInit=*/nullptr));
4982     ++I;
4983   }
4984   I = Data.FirstprivateCopies.begin();
4985   auto IElemInitRef = Data.FirstprivateInits.begin();
4986   for (const Expr *E : Data.FirstprivateVars) {
4987     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4988     Privates.emplace_back(
4989         C.getDeclAlign(VD),
4990         PrivateHelpersTy(
4991             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4992             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4993     ++I;
4994     ++IElemInitRef;
4995   }
4996   I = Data.LastprivateCopies.begin();
4997   for (const Expr *E : Data.LastprivateVars) {
4998     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4999     Privates.emplace_back(
5000         C.getDeclAlign(VD),
5001         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5002                          /*PrivateElemInit=*/nullptr));
5003     ++I;
5004   }
5005   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5006     return L.first > R.first;
5007   });
5008   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5009   // Build type kmp_routine_entry_t (if not built yet).
5010   emitKmpRoutineEntryT(KmpInt32Ty);
5011   // Build type kmp_task_t (if not built yet).
5012   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5013     if (SavedKmpTaskloopTQTy.isNull()) {
5014       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5015           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5016     }
5017     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5018   } else {
5019     assert((D.getDirectiveKind() == OMPD_task ||
5020             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5021             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5022            "Expected taskloop, task or target directive");
5023     if (SavedKmpTaskTQTy.isNull()) {
5024       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5025           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5026     }
5027     KmpTaskTQTy = SavedKmpTaskTQTy;
5028   }
5029   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5030   // Build particular struct kmp_task_t for the given task.
5031   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5032       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5033   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5034   QualType KmpTaskTWithPrivatesPtrQTy =
5035       C.getPointerType(KmpTaskTWithPrivatesQTy);
5036   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5037   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5038       KmpTaskTWithPrivatesTy->getPointerTo();
5039   llvm::Value *KmpTaskTWithPrivatesTySize =
5040       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5041   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5042 
5043   // Emit initial values for private copies (if any).
5044   llvm::Value *TaskPrivatesMap = nullptr;
5045   llvm::Type *TaskPrivatesMapTy =
5046       std::next(TaskFunction->arg_begin(), 3)->getType();
5047   if (!Privates.empty()) {
5048     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5049     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5050         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5051         FI->getType(), Privates);
5052     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5053         TaskPrivatesMap, TaskPrivatesMapTy);
5054   } else {
5055     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5056         cast<llvm::PointerType>(TaskPrivatesMapTy));
5057   }
5058   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5059   // kmp_task_t *tt);
5060   llvm::Function *TaskEntry = emitProxyTaskFunction(
5061       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5062       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5063       TaskPrivatesMap);
5064 
5065   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5066   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5067   // kmp_routine_entry_t *task_entry);
5068   // Task flags. Format is taken from
5069   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5070   // description of kmp_tasking_flags struct.
5071   enum {
5072     TiedFlag = 0x1,
5073     FinalFlag = 0x2,
5074     DestructorsFlag = 0x8,
5075     PriorityFlag = 0x20
5076   };
5077   unsigned Flags = Data.Tied ? TiedFlag : 0;
5078   bool NeedsCleanup = false;
5079   if (!Privates.empty()) {
5080     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5081     if (NeedsCleanup)
5082       Flags = Flags | DestructorsFlag;
5083   }
5084   if (Data.Priority.getInt())
5085     Flags = Flags | PriorityFlag;
5086   llvm::Value *TaskFlags =
5087       Data.Final.getPointer()
5088           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5089                                      CGF.Builder.getInt32(FinalFlag),
5090                                      CGF.Builder.getInt32(/*C=*/0))
5091           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5092   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5093   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5094   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5095       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5096       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5097           TaskEntry, KmpRoutineEntryPtrTy)};
5098   llvm::Value *NewTask;
5099   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5100     // Check if we have any device clause associated with the directive.
5101     const Expr *Device = nullptr;
5102     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5103       Device = C->getDevice();
5104     // Emit device ID if any otherwise use default value.
5105     llvm::Value *DeviceID;
5106     if (Device)
5107       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5108                                            CGF.Int64Ty, /*isSigned=*/true);
5109     else
5110       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5111     AllocArgs.push_back(DeviceID);
5112     NewTask = CGF.EmitRuntimeCall(
5113       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5114   } else {
5115     NewTask = CGF.EmitRuntimeCall(
5116       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5117   }
5118   llvm::Value *NewTaskNewTaskTTy =
5119       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5120           NewTask, KmpTaskTWithPrivatesPtrTy);
5121   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5122                                                KmpTaskTWithPrivatesQTy);
5123   LValue TDBase =
5124       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5125   // Fill the data in the resulting kmp_task_t record.
5126   // Copy shareds if there are any.
5127   Address KmpTaskSharedsPtr = Address::invalid();
5128   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5129     KmpTaskSharedsPtr =
5130         Address(CGF.EmitLoadOfScalar(
5131                     CGF.EmitLValueForField(
5132                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5133                                            KmpTaskTShareds)),
5134                     Loc),
5135                 CGF.getNaturalTypeAlignment(SharedsTy));
5136     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5137     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5138     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5139   }
5140   // Emit initial values for private copies (if any).
5141   TaskResultTy Result;
5142   if (!Privates.empty()) {
5143     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5144                      SharedsTy, SharedsPtrTy, Data, Privates,
5145                      /*ForDup=*/false);
5146     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5147         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5148       Result.TaskDupFn = emitTaskDupFunction(
5149           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5150           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5151           /*WithLastIter=*/!Data.LastprivateVars.empty());
5152     }
5153   }
5154   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5155   enum { Priority = 0, Destructors = 1 };
5156   // Provide pointer to function with destructors for privates.
5157   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5158   const RecordDecl *KmpCmplrdataUD =
5159       (*FI)->getType()->getAsUnionType()->getDecl();
5160   if (NeedsCleanup) {
5161     llvm::Value *DestructorFn = emitDestructorsFunction(
5162         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5163         KmpTaskTWithPrivatesQTy);
5164     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5165     LValue DestructorsLV = CGF.EmitLValueForField(
5166         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5167     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5168                               DestructorFn, KmpRoutineEntryPtrTy),
5169                           DestructorsLV);
5170   }
5171   // Set priority.
5172   if (Data.Priority.getInt()) {
5173     LValue Data2LV = CGF.EmitLValueForField(
5174         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5175     LValue PriorityLV = CGF.EmitLValueForField(
5176         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5177     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5178   }
5179   Result.NewTask = NewTask;
5180   Result.TaskEntry = TaskEntry;
5181   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5182   Result.TDBase = TDBase;
5183   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5184   return Result;
5185 }
5186 
5187 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5188                                    const OMPExecutableDirective &D,
5189                                    llvm::Function *TaskFunction,
5190                                    QualType SharedsTy, Address Shareds,
5191                                    const Expr *IfCond,
5192                                    const OMPTaskDataTy &Data) {
5193   if (!CGF.HaveInsertPoint())
5194     return;
5195 
5196   TaskResultTy Result =
5197       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5198   llvm::Value *NewTask = Result.NewTask;
5199   llvm::Function *TaskEntry = Result.TaskEntry;
5200   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5201   LValue TDBase = Result.TDBase;
5202   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5203   ASTContext &C = CGM.getContext();
5204   // Process list of dependences.
5205   Address DependenciesArray = Address::invalid();
5206   unsigned NumDependencies = Data.Dependences.size();
5207   if (NumDependencies) {
5208     // Dependence kind for RTL.
5209     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5210     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5211     RecordDecl *KmpDependInfoRD;
5212     QualType FlagsTy =
5213         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5214     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5215     if (KmpDependInfoTy.isNull()) {
5216       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5217       KmpDependInfoRD->startDefinition();
5218       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5219       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5220       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5221       KmpDependInfoRD->completeDefinition();
5222       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5223     } else {
5224       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5225     }
5226     // Define type kmp_depend_info[<Dependences.size()>];
5227     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5228         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5229         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5230     // kmp_depend_info[<Dependences.size()>] deps;
5231     DependenciesArray =
5232         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5233     for (unsigned I = 0; I < NumDependencies; ++I) {
5234       const Expr *E = Data.Dependences[I].second;
5235       LValue Addr = CGF.EmitLValue(E);
5236       llvm::Value *Size;
5237       QualType Ty = E->getType();
5238       if (const auto *ASE =
5239               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5240         LValue UpAddrLVal =
5241             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5242         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5243             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5244         llvm::Value *LowIntPtr =
5245             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5246         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5247         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5248       } else {
5249         Size = CGF.getTypeSize(Ty);
5250       }
5251       LValue Base = CGF.MakeAddrLValue(
5252           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5253           KmpDependInfoTy);
5254       // deps[i].base_addr = &<Dependences[i].second>;
5255       LValue BaseAddrLVal = CGF.EmitLValueForField(
5256           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5257       CGF.EmitStoreOfScalar(
5258           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5259           BaseAddrLVal);
5260       // deps[i].len = sizeof(<Dependences[i].second>);
5261       LValue LenLVal = CGF.EmitLValueForField(
5262           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5263       CGF.EmitStoreOfScalar(Size, LenLVal);
5264       // deps[i].flags = <Dependences[i].first>;
5265       RTLDependenceKindTy DepKind;
5266       switch (Data.Dependences[I].first) {
5267       case OMPC_DEPEND_in:
5268         DepKind = DepIn;
5269         break;
5270       // Out and InOut dependencies must use the same code.
5271       case OMPC_DEPEND_out:
5272       case OMPC_DEPEND_inout:
5273         DepKind = DepInOut;
5274         break;
5275       case OMPC_DEPEND_mutexinoutset:
5276         DepKind = DepMutexInOutSet;
5277         break;
5278       case OMPC_DEPEND_source:
5279       case OMPC_DEPEND_sink:
5280       case OMPC_DEPEND_unknown:
5281         llvm_unreachable("Unknown task dependence type");
5282       }
5283       LValue FlagsLVal = CGF.EmitLValueForField(
5284           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5285       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5286                             FlagsLVal);
5287     }
5288     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5289         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5290   }
5291 
5292   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5293   // libcall.
5294   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5295   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5296   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5297   // list is not empty
5298   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5299   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5300   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5301   llvm::Value *DepTaskArgs[7];
5302   if (NumDependencies) {
5303     DepTaskArgs[0] = UpLoc;
5304     DepTaskArgs[1] = ThreadID;
5305     DepTaskArgs[2] = NewTask;
5306     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5307     DepTaskArgs[4] = DependenciesArray.getPointer();
5308     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5309     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5310   }
5311   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5312                         &TaskArgs,
5313                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5314     if (!Data.Tied) {
5315       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5316       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5317       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5318     }
5319     if (NumDependencies) {
5320       CGF.EmitRuntimeCall(
5321           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5322     } else {
5323       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5324                           TaskArgs);
5325     }
5326     // Check if parent region is untied and build return for untied task;
5327     if (auto *Region =
5328             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5329       Region->emitUntiedSwitch(CGF);
5330   };
5331 
5332   llvm::Value *DepWaitTaskArgs[6];
5333   if (NumDependencies) {
5334     DepWaitTaskArgs[0] = UpLoc;
5335     DepWaitTaskArgs[1] = ThreadID;
5336     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5337     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5338     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5339     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5340   }
5341   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5342                         NumDependencies, &DepWaitTaskArgs,
5343                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5344     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5345     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5346     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5347     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5348     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5349     // is specified.
5350     if (NumDependencies)
5351       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5352                           DepWaitTaskArgs);
5353     // Call proxy_task_entry(gtid, new_task);
5354     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5355                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5356       Action.Enter(CGF);
5357       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5358       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5359                                                           OutlinedFnArgs);
5360     };
5361 
5362     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5363     // kmp_task_t *new_task);
5364     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5365     // kmp_task_t *new_task);
5366     RegionCodeGenTy RCG(CodeGen);
5367     CommonActionTy Action(
5368         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5369         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5370     RCG.setAction(Action);
5371     RCG(CGF);
5372   };
5373 
5374   if (IfCond) {
5375     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5376   } else {
5377     RegionCodeGenTy ThenRCG(ThenCodeGen);
5378     ThenRCG(CGF);
5379   }
5380 }
5381 
5382 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5383                                        const OMPLoopDirective &D,
5384                                        llvm::Function *TaskFunction,
5385                                        QualType SharedsTy, Address Shareds,
5386                                        const Expr *IfCond,
5387                                        const OMPTaskDataTy &Data) {
5388   if (!CGF.HaveInsertPoint())
5389     return;
5390   TaskResultTy Result =
5391       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5392   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5393   // libcall.
5394   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5395   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5396   // sched, kmp_uint64 grainsize, void *task_dup);
5397   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5398   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5399   llvm::Value *IfVal;
5400   if (IfCond) {
5401     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5402                                       /*isSigned=*/true);
5403   } else {
5404     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5405   }
5406 
5407   LValue LBLVal = CGF.EmitLValueForField(
5408       Result.TDBase,
5409       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5410   const auto *LBVar =
5411       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5412   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5413                        LBLVal.getQuals(),
5414                        /*IsInitializer=*/true);
5415   LValue UBLVal = CGF.EmitLValueForField(
5416       Result.TDBase,
5417       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5418   const auto *UBVar =
5419       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5420   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5421                        UBLVal.getQuals(),
5422                        /*IsInitializer=*/true);
5423   LValue StLVal = CGF.EmitLValueForField(
5424       Result.TDBase,
5425       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5426   const auto *StVar =
5427       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5428   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5429                        StLVal.getQuals(),
5430                        /*IsInitializer=*/true);
5431   // Store reductions address.
5432   LValue RedLVal = CGF.EmitLValueForField(
5433       Result.TDBase,
5434       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5435   if (Data.Reductions) {
5436     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5437   } else {
5438     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5439                                CGF.getContext().VoidPtrTy);
5440   }
5441   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5442   llvm::Value *TaskArgs[] = {
5443       UpLoc,
5444       ThreadID,
5445       Result.NewTask,
5446       IfVal,
5447       LBLVal.getPointer(CGF),
5448       UBLVal.getPointer(CGF),
5449       CGF.EmitLoadOfScalar(StLVal, Loc),
5450       llvm::ConstantInt::getSigned(
5451           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5452       llvm::ConstantInt::getSigned(
5453           CGF.IntTy, Data.Schedule.getPointer()
5454                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5455                          : NoSchedule),
5456       Data.Schedule.getPointer()
5457           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5458                                       /*isSigned=*/false)
5459           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5460       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5461                              Result.TaskDupFn, CGF.VoidPtrTy)
5462                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5463   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5464 }
5465 
5466 /// Emit reduction operation for each element of array (required for
5467 /// array sections) LHS op = RHS.
5468 /// \param Type Type of array.
5469 /// \param LHSVar Variable on the left side of the reduction operation
5470 /// (references element of array in original variable).
5471 /// \param RHSVar Variable on the right side of the reduction operation
5472 /// (references element of array in original variable).
5473 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5474 /// RHSVar.
5475 static void EmitOMPAggregateReduction(
5476     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5477     const VarDecl *RHSVar,
5478     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5479                                   const Expr *, const Expr *)> &RedOpGen,
5480     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5481     const Expr *UpExpr = nullptr) {
5482   // Perform element-by-element initialization.
5483   QualType ElementTy;
5484   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5485   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5486 
5487   // Drill down to the base element type on both arrays.
5488   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5489   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5490 
5491   llvm::Value *RHSBegin = RHSAddr.getPointer();
5492   llvm::Value *LHSBegin = LHSAddr.getPointer();
5493   // Cast from pointer to array type to pointer to single element.
5494   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5495   // The basic structure here is a while-do loop.
5496   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5497   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5498   llvm::Value *IsEmpty =
5499       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5500   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5501 
5502   // Enter the loop body, making that address the current address.
5503   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5504   CGF.EmitBlock(BodyBB);
5505 
5506   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5507 
5508   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5509       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5510   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5511   Address RHSElementCurrent =
5512       Address(RHSElementPHI,
5513               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5514 
5515   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5516       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5517   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5518   Address LHSElementCurrent =
5519       Address(LHSElementPHI,
5520               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5521 
5522   // Emit copy.
5523   CodeGenFunction::OMPPrivateScope Scope(CGF);
5524   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5525   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5526   Scope.Privatize();
5527   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5528   Scope.ForceCleanup();
5529 
5530   // Shift the address forward by one element.
5531   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5532       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5533   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5534       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5535   // Check whether we've reached the end.
5536   llvm::Value *Done =
5537       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5538   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5539   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5540   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5541 
5542   // Done.
5543   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5544 }
5545 
5546 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5547 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5548 /// UDR combiner function.
5549 static void emitReductionCombiner(CodeGenFunction &CGF,
5550                                   const Expr *ReductionOp) {
5551   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5552     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5553       if (const auto *DRE =
5554               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5555         if (const auto *DRD =
5556                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5557           std::pair<llvm::Function *, llvm::Function *> Reduction =
5558               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5559           RValue Func = RValue::get(Reduction.first);
5560           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5561           CGF.EmitIgnoredExpr(ReductionOp);
5562           return;
5563         }
5564   CGF.EmitIgnoredExpr(ReductionOp);
5565 }
5566 
5567 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5568     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5569     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5570     ArrayRef<const Expr *> ReductionOps) {
5571   ASTContext &C = CGM.getContext();
5572 
5573   // void reduction_func(void *LHSArg, void *RHSArg);
5574   FunctionArgList Args;
5575   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5576                            ImplicitParamDecl::Other);
5577   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5578                            ImplicitParamDecl::Other);
5579   Args.push_back(&LHSArg);
5580   Args.push_back(&RHSArg);
5581   const auto &CGFI =
5582       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5583   std::string Name = getName({"omp", "reduction", "reduction_func"});
5584   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5585                                     llvm::GlobalValue::InternalLinkage, Name,
5586                                     &CGM.getModule());
5587   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5588   Fn->setDoesNotRecurse();
5589   CodeGenFunction CGF(CGM);
5590   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5591 
5592   // Dst = (void*[n])(LHSArg);
5593   // Src = (void*[n])(RHSArg);
5594   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5595       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5596       ArgsType), CGF.getPointerAlign());
5597   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5598       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5599       ArgsType), CGF.getPointerAlign());
5600 
5601   //  ...
5602   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5603   //  ...
5604   CodeGenFunction::OMPPrivateScope Scope(CGF);
5605   auto IPriv = Privates.begin();
5606   unsigned Idx = 0;
5607   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5608     const auto *RHSVar =
5609         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5610     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5611       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5612     });
5613     const auto *LHSVar =
5614         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5615     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5616       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5617     });
5618     QualType PrivTy = (*IPriv)->getType();
5619     if (PrivTy->isVariablyModifiedType()) {
5620       // Get array size and emit VLA type.
5621       ++Idx;
5622       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5623       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5624       const VariableArrayType *VLA =
5625           CGF.getContext().getAsVariableArrayType(PrivTy);
5626       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5627       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5628           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5629       CGF.EmitVariablyModifiedType(PrivTy);
5630     }
5631   }
5632   Scope.Privatize();
5633   IPriv = Privates.begin();
5634   auto ILHS = LHSExprs.begin();
5635   auto IRHS = RHSExprs.begin();
5636   for (const Expr *E : ReductionOps) {
5637     if ((*IPriv)->getType()->isArrayType()) {
5638       // Emit reduction for array section.
5639       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5640       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5641       EmitOMPAggregateReduction(
5642           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5643           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5644             emitReductionCombiner(CGF, E);
5645           });
5646     } else {
5647       // Emit reduction for array subscript or single variable.
5648       emitReductionCombiner(CGF, E);
5649     }
5650     ++IPriv;
5651     ++ILHS;
5652     ++IRHS;
5653   }
5654   Scope.ForceCleanup();
5655   CGF.FinishFunction();
5656   return Fn;
5657 }
5658 
5659 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5660                                                   const Expr *ReductionOp,
5661                                                   const Expr *PrivateRef,
5662                                                   const DeclRefExpr *LHS,
5663                                                   const DeclRefExpr *RHS) {
5664   if (PrivateRef->getType()->isArrayType()) {
5665     // Emit reduction for array section.
5666     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5667     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5668     EmitOMPAggregateReduction(
5669         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5670         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5671           emitReductionCombiner(CGF, ReductionOp);
5672         });
5673   } else {
5674     // Emit reduction for array subscript or single variable.
5675     emitReductionCombiner(CGF, ReductionOp);
5676   }
5677 }
5678 
5679 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5680                                     ArrayRef<const Expr *> Privates,
5681                                     ArrayRef<const Expr *> LHSExprs,
5682                                     ArrayRef<const Expr *> RHSExprs,
5683                                     ArrayRef<const Expr *> ReductionOps,
5684                                     ReductionOptionsTy Options) {
5685   if (!CGF.HaveInsertPoint())
5686     return;
5687 
5688   bool WithNowait = Options.WithNowait;
5689   bool SimpleReduction = Options.SimpleReduction;
5690 
5691   // Next code should be emitted for reduction:
5692   //
5693   // static kmp_critical_name lock = { 0 };
5694   //
5695   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5696   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5697   //  ...
5698   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5699   //  *(Type<n>-1*)rhs[<n>-1]);
5700   // }
5701   //
5702   // ...
5703   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5704   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5705   // RedList, reduce_func, &<lock>)) {
5706   // case 1:
5707   //  ...
5708   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5709   //  ...
5710   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5711   // break;
5712   // case 2:
5713   //  ...
5714   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5715   //  ...
5716   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5717   // break;
5718   // default:;
5719   // }
5720   //
5721   // if SimpleReduction is true, only the next code is generated:
5722   //  ...
5723   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5724   //  ...
5725 
5726   ASTContext &C = CGM.getContext();
5727 
5728   if (SimpleReduction) {
5729     CodeGenFunction::RunCleanupsScope Scope(CGF);
5730     auto IPriv = Privates.begin();
5731     auto ILHS = LHSExprs.begin();
5732     auto IRHS = RHSExprs.begin();
5733     for (const Expr *E : ReductionOps) {
5734       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5735                                   cast<DeclRefExpr>(*IRHS));
5736       ++IPriv;
5737       ++ILHS;
5738       ++IRHS;
5739     }
5740     return;
5741   }
5742 
5743   // 1. Build a list of reduction variables.
5744   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5745   auto Size = RHSExprs.size();
5746   for (const Expr *E : Privates) {
5747     if (E->getType()->isVariablyModifiedType())
5748       // Reserve place for array size.
5749       ++Size;
5750   }
5751   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5752   QualType ReductionArrayTy =
5753       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5754                              /*IndexTypeQuals=*/0);
5755   Address ReductionList =
5756       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5757   auto IPriv = Privates.begin();
5758   unsigned Idx = 0;
5759   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5760     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5761     CGF.Builder.CreateStore(
5762         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5763             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5764         Elem);
5765     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5766       // Store array size.
5767       ++Idx;
5768       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5769       llvm::Value *Size = CGF.Builder.CreateIntCast(
5770           CGF.getVLASize(
5771                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5772               .NumElts,
5773           CGF.SizeTy, /*isSigned=*/false);
5774       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5775                               Elem);
5776     }
5777   }
5778 
5779   // 2. Emit reduce_func().
5780   llvm::Function *ReductionFn = emitReductionFunction(
5781       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5782       LHSExprs, RHSExprs, ReductionOps);
5783 
5784   // 3. Create static kmp_critical_name lock = { 0 };
5785   std::string Name = getName({"reduction"});
5786   llvm::Value *Lock = getCriticalRegionLock(Name);
5787 
5788   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5789   // RedList, reduce_func, &<lock>);
5790   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5791   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5792   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5793   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5794       ReductionList.getPointer(), CGF.VoidPtrTy);
5795   llvm::Value *Args[] = {
5796       IdentTLoc,                             // ident_t *<loc>
5797       ThreadId,                              // i32 <gtid>
5798       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5799       ReductionArrayTySize,                  // size_type sizeof(RedList)
5800       RL,                                    // void *RedList
5801       ReductionFn, // void (*) (void *, void *) <reduce_func>
5802       Lock         // kmp_critical_name *&<lock>
5803   };
5804   llvm::Value *Res = CGF.EmitRuntimeCall(
5805       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5806                                        : OMPRTL__kmpc_reduce),
5807       Args);
5808 
5809   // 5. Build switch(res)
5810   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5811   llvm::SwitchInst *SwInst =
5812       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5813 
5814   // 6. Build case 1:
5815   //  ...
5816   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5817   //  ...
5818   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5819   // break;
5820   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5821   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5822   CGF.EmitBlock(Case1BB);
5823 
5824   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5825   llvm::Value *EndArgs[] = {
5826       IdentTLoc, // ident_t *<loc>
5827       ThreadId,  // i32 <gtid>
5828       Lock       // kmp_critical_name *&<lock>
5829   };
5830   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5831                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5832     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5833     auto IPriv = Privates.begin();
5834     auto ILHS = LHSExprs.begin();
5835     auto IRHS = RHSExprs.begin();
5836     for (const Expr *E : ReductionOps) {
5837       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5838                                      cast<DeclRefExpr>(*IRHS));
5839       ++IPriv;
5840       ++ILHS;
5841       ++IRHS;
5842     }
5843   };
5844   RegionCodeGenTy RCG(CodeGen);
5845   CommonActionTy Action(
5846       nullptr, llvm::None,
5847       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5848                                        : OMPRTL__kmpc_end_reduce),
5849       EndArgs);
5850   RCG.setAction(Action);
5851   RCG(CGF);
5852 
5853   CGF.EmitBranch(DefaultBB);
5854 
5855   // 7. Build case 2:
5856   //  ...
5857   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5858   //  ...
5859   // break;
5860   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5861   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5862   CGF.EmitBlock(Case2BB);
5863 
5864   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5865                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5866     auto ILHS = LHSExprs.begin();
5867     auto IRHS = RHSExprs.begin();
5868     auto IPriv = Privates.begin();
5869     for (const Expr *E : ReductionOps) {
5870       const Expr *XExpr = nullptr;
5871       const Expr *EExpr = nullptr;
5872       const Expr *UpExpr = nullptr;
5873       BinaryOperatorKind BO = BO_Comma;
5874       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5875         if (BO->getOpcode() == BO_Assign) {
5876           XExpr = BO->getLHS();
5877           UpExpr = BO->getRHS();
5878         }
5879       }
5880       // Try to emit update expression as a simple atomic.
5881       const Expr *RHSExpr = UpExpr;
5882       if (RHSExpr) {
5883         // Analyze RHS part of the whole expression.
5884         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5885                 RHSExpr->IgnoreParenImpCasts())) {
5886           // If this is a conditional operator, analyze its condition for
5887           // min/max reduction operator.
5888           RHSExpr = ACO->getCond();
5889         }
5890         if (const auto *BORHS =
5891                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5892           EExpr = BORHS->getRHS();
5893           BO = BORHS->getOpcode();
5894         }
5895       }
5896       if (XExpr) {
5897         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5898         auto &&AtomicRedGen = [BO, VD,
5899                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5900                                     const Expr *EExpr, const Expr *UpExpr) {
5901           LValue X = CGF.EmitLValue(XExpr);
5902           RValue E;
5903           if (EExpr)
5904             E = CGF.EmitAnyExpr(EExpr);
5905           CGF.EmitOMPAtomicSimpleUpdateExpr(
5906               X, E, BO, /*IsXLHSInRHSPart=*/true,
5907               llvm::AtomicOrdering::Monotonic, Loc,
5908               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5909                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5910                 PrivateScope.addPrivate(
5911                     VD, [&CGF, VD, XRValue, Loc]() {
5912                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5913                       CGF.emitOMPSimpleStore(
5914                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5915                           VD->getType().getNonReferenceType(), Loc);
5916                       return LHSTemp;
5917                     });
5918                 (void)PrivateScope.Privatize();
5919                 return CGF.EmitAnyExpr(UpExpr);
5920               });
5921         };
5922         if ((*IPriv)->getType()->isArrayType()) {
5923           // Emit atomic reduction for array section.
5924           const auto *RHSVar =
5925               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5926           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5927                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5928         } else {
5929           // Emit atomic reduction for array subscript or single variable.
5930           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5931         }
5932       } else {
5933         // Emit as a critical region.
5934         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5935                                            const Expr *, const Expr *) {
5936           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5937           std::string Name = RT.getName({"atomic_reduction"});
5938           RT.emitCriticalRegion(
5939               CGF, Name,
5940               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5941                 Action.Enter(CGF);
5942                 emitReductionCombiner(CGF, E);
5943               },
5944               Loc);
5945         };
5946         if ((*IPriv)->getType()->isArrayType()) {
5947           const auto *LHSVar =
5948               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5949           const auto *RHSVar =
5950               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5951           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5952                                     CritRedGen);
5953         } else {
5954           CritRedGen(CGF, nullptr, nullptr, nullptr);
5955         }
5956       }
5957       ++ILHS;
5958       ++IRHS;
5959       ++IPriv;
5960     }
5961   };
5962   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5963   if (!WithNowait) {
5964     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5965     llvm::Value *EndArgs[] = {
5966         IdentTLoc, // ident_t *<loc>
5967         ThreadId,  // i32 <gtid>
5968         Lock       // kmp_critical_name *&<lock>
5969     };
5970     CommonActionTy Action(nullptr, llvm::None,
5971                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5972                           EndArgs);
5973     AtomicRCG.setAction(Action);
5974     AtomicRCG(CGF);
5975   } else {
5976     AtomicRCG(CGF);
5977   }
5978 
5979   CGF.EmitBranch(DefaultBB);
5980   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5981 }
5982 
5983 /// Generates unique name for artificial threadprivate variables.
5984 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5985 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5986                                       const Expr *Ref) {
5987   SmallString<256> Buffer;
5988   llvm::raw_svector_ostream Out(Buffer);
5989   const clang::DeclRefExpr *DE;
5990   const VarDecl *D = ::getBaseDecl(Ref, DE);
5991   if (!D)
5992     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5993   D = D->getCanonicalDecl();
5994   std::string Name = CGM.getOpenMPRuntime().getName(
5995       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5996   Out << Prefix << Name << "_"
5997       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5998   return std::string(Out.str());
5999 }
6000 
6001 /// Emits reduction initializer function:
6002 /// \code
6003 /// void @.red_init(void* %arg) {
6004 /// %0 = bitcast void* %arg to <type>*
6005 /// store <type> <init>, <type>* %0
6006 /// ret void
6007 /// }
6008 /// \endcode
6009 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6010                                            SourceLocation Loc,
6011                                            ReductionCodeGen &RCG, unsigned N) {
6012   ASTContext &C = CGM.getContext();
6013   FunctionArgList Args;
6014   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6015                           ImplicitParamDecl::Other);
6016   Args.emplace_back(&Param);
6017   const auto &FnInfo =
6018       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6019   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6020   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6021   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6022                                     Name, &CGM.getModule());
6023   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6024   Fn->setDoesNotRecurse();
6025   CodeGenFunction CGF(CGM);
6026   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6027   Address PrivateAddr = CGF.EmitLoadOfPointer(
6028       CGF.GetAddrOfLocalVar(&Param),
6029       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6030   llvm::Value *Size = nullptr;
6031   // If the size of the reduction item is non-constant, load it from global
6032   // threadprivate variable.
6033   if (RCG.getSizes(N).second) {
6034     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6035         CGF, CGM.getContext().getSizeType(),
6036         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6037     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6038                                 CGM.getContext().getSizeType(), Loc);
6039   }
6040   RCG.emitAggregateType(CGF, N, Size);
6041   LValue SharedLVal;
6042   // If initializer uses initializer from declare reduction construct, emit a
6043   // pointer to the address of the original reduction item (reuired by reduction
6044   // initializer)
6045   if (RCG.usesReductionInitializer(N)) {
6046     Address SharedAddr =
6047         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6048             CGF, CGM.getContext().VoidPtrTy,
6049             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6050     SharedAddr = CGF.EmitLoadOfPointer(
6051         SharedAddr,
6052         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6053     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6054   } else {
6055     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6056         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6057         CGM.getContext().VoidPtrTy);
6058   }
6059   // Emit the initializer:
6060   // %0 = bitcast void* %arg to <type>*
6061   // store <type> <init>, <type>* %0
6062   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6063                          [](CodeGenFunction &) { return false; });
6064   CGF.FinishFunction();
6065   return Fn;
6066 }
6067 
6068 /// Emits reduction combiner function:
6069 /// \code
6070 /// void @.red_comb(void* %arg0, void* %arg1) {
6071 /// %lhs = bitcast void* %arg0 to <type>*
6072 /// %rhs = bitcast void* %arg1 to <type>*
6073 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6074 /// store <type> %2, <type>* %lhs
6075 /// ret void
6076 /// }
6077 /// \endcode
6078 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6079                                            SourceLocation Loc,
6080                                            ReductionCodeGen &RCG, unsigned N,
6081                                            const Expr *ReductionOp,
6082                                            const Expr *LHS, const Expr *RHS,
6083                                            const Expr *PrivateRef) {
6084   ASTContext &C = CGM.getContext();
6085   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6086   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6087   FunctionArgList Args;
6088   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6089                                C.VoidPtrTy, ImplicitParamDecl::Other);
6090   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6091                             ImplicitParamDecl::Other);
6092   Args.emplace_back(&ParamInOut);
6093   Args.emplace_back(&ParamIn);
6094   const auto &FnInfo =
6095       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6096   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6097   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6098   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6099                                     Name, &CGM.getModule());
6100   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6101   Fn->setDoesNotRecurse();
6102   CodeGenFunction CGF(CGM);
6103   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6104   llvm::Value *Size = nullptr;
6105   // If the size of the reduction item is non-constant, load it from global
6106   // threadprivate variable.
6107   if (RCG.getSizes(N).second) {
6108     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6109         CGF, CGM.getContext().getSizeType(),
6110         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6111     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6112                                 CGM.getContext().getSizeType(), Loc);
6113   }
6114   RCG.emitAggregateType(CGF, N, Size);
6115   // Remap lhs and rhs variables to the addresses of the function arguments.
6116   // %lhs = bitcast void* %arg0 to <type>*
6117   // %rhs = bitcast void* %arg1 to <type>*
6118   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6119   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6120     // Pull out the pointer to the variable.
6121     Address PtrAddr = CGF.EmitLoadOfPointer(
6122         CGF.GetAddrOfLocalVar(&ParamInOut),
6123         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6124     return CGF.Builder.CreateElementBitCast(
6125         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6126   });
6127   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6128     // Pull out the pointer to the variable.
6129     Address PtrAddr = CGF.EmitLoadOfPointer(
6130         CGF.GetAddrOfLocalVar(&ParamIn),
6131         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6132     return CGF.Builder.CreateElementBitCast(
6133         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6134   });
6135   PrivateScope.Privatize();
6136   // Emit the combiner body:
6137   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6138   // store <type> %2, <type>* %lhs
6139   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6140       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6141       cast<DeclRefExpr>(RHS));
6142   CGF.FinishFunction();
6143   return Fn;
6144 }
6145 
6146 /// Emits reduction finalizer function:
6147 /// \code
6148 /// void @.red_fini(void* %arg) {
6149 /// %0 = bitcast void* %arg to <type>*
6150 /// <destroy>(<type>* %0)
6151 /// ret void
6152 /// }
6153 /// \endcode
6154 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6155                                            SourceLocation Loc,
6156                                            ReductionCodeGen &RCG, unsigned N) {
6157   if (!RCG.needCleanups(N))
6158     return nullptr;
6159   ASTContext &C = CGM.getContext();
6160   FunctionArgList Args;
6161   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6162                           ImplicitParamDecl::Other);
6163   Args.emplace_back(&Param);
6164   const auto &FnInfo =
6165       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6166   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6167   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6168   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6169                                     Name, &CGM.getModule());
6170   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6171   Fn->setDoesNotRecurse();
6172   CodeGenFunction CGF(CGM);
6173   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6174   Address PrivateAddr = CGF.EmitLoadOfPointer(
6175       CGF.GetAddrOfLocalVar(&Param),
6176       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6177   llvm::Value *Size = nullptr;
6178   // If the size of the reduction item is non-constant, load it from global
6179   // threadprivate variable.
6180   if (RCG.getSizes(N).second) {
6181     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6182         CGF, CGM.getContext().getSizeType(),
6183         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6184     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6185                                 CGM.getContext().getSizeType(), Loc);
6186   }
6187   RCG.emitAggregateType(CGF, N, Size);
6188   // Emit the finalizer body:
6189   // <destroy>(<type>* %0)
6190   RCG.emitCleanups(CGF, N, PrivateAddr);
6191   CGF.FinishFunction(Loc);
6192   return Fn;
6193 }
6194 
6195 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6196     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6197     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6198   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6199     return nullptr;
6200 
6201   // Build typedef struct:
6202   // kmp_task_red_input {
6203   //   void *reduce_shar; // shared reduction item
6204   //   size_t reduce_size; // size of data item
6205   //   void *reduce_init; // data initialization routine
6206   //   void *reduce_fini; // data finalization routine
6207   //   void *reduce_comb; // data combiner routine
6208   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6209   // } kmp_task_red_input_t;
6210   ASTContext &C = CGM.getContext();
6211   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6212   RD->startDefinition();
6213   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6214   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6215   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6216   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6217   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6218   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6219       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6220   RD->completeDefinition();
6221   QualType RDType = C.getRecordType(RD);
6222   unsigned Size = Data.ReductionVars.size();
6223   llvm::APInt ArraySize(/*numBits=*/64, Size);
6224   QualType ArrayRDType = C.getConstantArrayType(
6225       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6226   // kmp_task_red_input_t .rd_input.[Size];
6227   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6228   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6229                        Data.ReductionOps);
6230   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6231     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6232     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6233                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6234     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6235         TaskRedInput.getPointer(), Idxs,
6236         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6237         ".rd_input.gep.");
6238     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6239     // ElemLVal.reduce_shar = &Shareds[Cnt];
6240     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6241     RCG.emitSharedLValue(CGF, Cnt);
6242     llvm::Value *CastedShared =
6243         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6244     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6245     RCG.emitAggregateType(CGF, Cnt);
6246     llvm::Value *SizeValInChars;
6247     llvm::Value *SizeVal;
6248     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6249     // We use delayed creation/initialization for VLAs, array sections and
6250     // custom reduction initializations. It is required because runtime does not
6251     // provide the way to pass the sizes of VLAs/array sections to
6252     // initializer/combiner/finalizer functions and does not pass the pointer to
6253     // original reduction item to the initializer. Instead threadprivate global
6254     // variables are used to store these values and use them in the functions.
6255     bool DelayedCreation = !!SizeVal;
6256     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6257                                                /*isSigned=*/false);
6258     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6259     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6260     // ElemLVal.reduce_init = init;
6261     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6262     llvm::Value *InitAddr =
6263         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6264     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6265     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6266     // ElemLVal.reduce_fini = fini;
6267     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6268     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6269     llvm::Value *FiniAddr = Fini
6270                                 ? CGF.EmitCastToVoidPtr(Fini)
6271                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6272     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6273     // ElemLVal.reduce_comb = comb;
6274     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6275     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6276         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6277         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6278     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6279     // ElemLVal.flags = 0;
6280     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6281     if (DelayedCreation) {
6282       CGF.EmitStoreOfScalar(
6283           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6284           FlagsLVal);
6285     } else
6286       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6287                                  FlagsLVal.getType());
6288   }
6289   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6290   // *data);
6291   llvm::Value *Args[] = {
6292       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6293                                 /*isSigned=*/true),
6294       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6295       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6296                                                       CGM.VoidPtrTy)};
6297   return CGF.EmitRuntimeCall(
6298       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6299 }
6300 
6301 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6302                                               SourceLocation Loc,
6303                                               ReductionCodeGen &RCG,
6304                                               unsigned N) {
6305   auto Sizes = RCG.getSizes(N);
6306   // Emit threadprivate global variable if the type is non-constant
6307   // (Sizes.second = nullptr).
6308   if (Sizes.second) {
6309     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6310                                                      /*isSigned=*/false);
6311     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6312         CGF, CGM.getContext().getSizeType(),
6313         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6314     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6315   }
6316   // Store address of the original reduction item if custom initializer is used.
6317   if (RCG.usesReductionInitializer(N)) {
6318     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6319         CGF, CGM.getContext().VoidPtrTy,
6320         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6321     CGF.Builder.CreateStore(
6322         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6323             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6324         SharedAddr, /*IsVolatile=*/false);
6325   }
6326 }
6327 
6328 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6329                                               SourceLocation Loc,
6330                                               llvm::Value *ReductionsPtr,
6331                                               LValue SharedLVal) {
6332   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6333   // *d);
6334   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6335                                                    CGM.IntTy,
6336                                                    /*isSigned=*/true),
6337                          ReductionsPtr,
6338                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6339                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6340   return Address(
6341       CGF.EmitRuntimeCall(
6342           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6343       SharedLVal.getAlignment());
6344 }
6345 
6346 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6347                                        SourceLocation Loc) {
6348   if (!CGF.HaveInsertPoint())
6349     return;
6350 
6351   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6352   if (OMPBuilder) {
6353     OMPBuilder->CreateTaskwait(CGF.Builder);
6354   } else {
6355     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6356     // global_tid);
6357     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6358     // Ignore return result until untied tasks are supported.
6359     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6360   }
6361 
6362   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6363     Region->emitUntiedSwitch(CGF);
6364 }
6365 
6366 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6367                                            OpenMPDirectiveKind InnerKind,
6368                                            const RegionCodeGenTy &CodeGen,
6369                                            bool HasCancel) {
6370   if (!CGF.HaveInsertPoint())
6371     return;
6372   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6373   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6374 }
6375 
6376 namespace {
6377 enum RTCancelKind {
6378   CancelNoreq = 0,
6379   CancelParallel = 1,
6380   CancelLoop = 2,
6381   CancelSections = 3,
6382   CancelTaskgroup = 4
6383 };
6384 } // anonymous namespace
6385 
6386 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6387   RTCancelKind CancelKind = CancelNoreq;
6388   if (CancelRegion == OMPD_parallel)
6389     CancelKind = CancelParallel;
6390   else if (CancelRegion == OMPD_for)
6391     CancelKind = CancelLoop;
6392   else if (CancelRegion == OMPD_sections)
6393     CancelKind = CancelSections;
6394   else {
6395     assert(CancelRegion == OMPD_taskgroup);
6396     CancelKind = CancelTaskgroup;
6397   }
6398   return CancelKind;
6399 }
6400 
6401 void CGOpenMPRuntime::emitCancellationPointCall(
6402     CodeGenFunction &CGF, SourceLocation Loc,
6403     OpenMPDirectiveKind CancelRegion) {
6404   if (!CGF.HaveInsertPoint())
6405     return;
6406   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6407   // global_tid, kmp_int32 cncl_kind);
6408   if (auto *OMPRegionInfo =
6409           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6410     // For 'cancellation point taskgroup', the task region info may not have a
6411     // cancel. This may instead happen in another adjacent task.
6412     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6413       llvm::Value *Args[] = {
6414           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6415           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6416       // Ignore return result until untied tasks are supported.
6417       llvm::Value *Result = CGF.EmitRuntimeCall(
6418           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6419       // if (__kmpc_cancellationpoint()) {
6420       //   exit from construct;
6421       // }
6422       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6423       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6424       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6425       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6426       CGF.EmitBlock(ExitBB);
6427       // exit from construct;
6428       CodeGenFunction::JumpDest CancelDest =
6429           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6430       CGF.EmitBranchThroughCleanup(CancelDest);
6431       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6432     }
6433   }
6434 }
6435 
6436 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6437                                      const Expr *IfCond,
6438                                      OpenMPDirectiveKind CancelRegion) {
6439   if (!CGF.HaveInsertPoint())
6440     return;
6441   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6442   // kmp_int32 cncl_kind);
6443   if (auto *OMPRegionInfo =
6444           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6445     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6446                                                         PrePostActionTy &) {
6447       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6448       llvm::Value *Args[] = {
6449           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6450           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6451       // Ignore return result until untied tasks are supported.
6452       llvm::Value *Result = CGF.EmitRuntimeCall(
6453           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6454       // if (__kmpc_cancel()) {
6455       //   exit from construct;
6456       // }
6457       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6458       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6459       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6460       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6461       CGF.EmitBlock(ExitBB);
6462       // exit from construct;
6463       CodeGenFunction::JumpDest CancelDest =
6464           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6465       CGF.EmitBranchThroughCleanup(CancelDest);
6466       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6467     };
6468     if (IfCond) {
6469       emitIfClause(CGF, IfCond, ThenGen,
6470                    [](CodeGenFunction &, PrePostActionTy &) {});
6471     } else {
6472       RegionCodeGenTy ThenRCG(ThenGen);
6473       ThenRCG(CGF);
6474     }
6475   }
6476 }
6477 
6478 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6479     const OMPExecutableDirective &D, StringRef ParentName,
6480     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6481     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6482   assert(!ParentName.empty() && "Invalid target region parent name!");
6483   HasEmittedTargetRegion = true;
6484   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6485                                    IsOffloadEntry, CodeGen);
6486 }
6487 
6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489     const OMPExecutableDirective &D, StringRef ParentName,
6490     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492   // Create a unique name for the entry function using the source location
6493   // information of the current target region. The name will be something like:
6494   //
6495   // __omp_offloading_DD_FFFF_PP_lBB
6496   //
6497   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498   // mangled name of the function that encloses the target region and BB is the
6499   // line number of the target region.
6500 
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520 
6521   // If this target outline function is not an offload entry, we don't need to
6522   // register it.
6523   if (!IsOffloadEntry)
6524     return;
6525 
6526   // The target region ID is used by the runtime library to identify the current
6527   // target region, so it only has to be unique and not necessarily point to
6528   // anything. It could be the pointer to the outlined function that implements
6529   // the target region, but we aren't using that so that the compiler doesn't
6530   // need to keep that, and could therefore inline the host function if proven
6531   // worthwhile during optimization. In the other hand, if emitting code for the
6532   // device, the ID has to be the function address so that it can retrieved from
6533   // the offloading entry and launched by the runtime library. We also mark the
6534   // outlined function to have external linkage in case we are emitting code for
6535   // the device, because these functions will be entry points to the device.
6536 
6537   if (CGM.getLangOpts().OpenMPIsDevice) {
6538     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540     OutlinedFn->setDSOLocal(false);
6541   } else {
6542     std::string Name = getName({EntryFnName, "region_id"});
6543     OutlinedFnID = new llvm::GlobalVariable(
6544         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6545         llvm::GlobalValue::WeakAnyLinkage,
6546         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6547   }
6548 
6549   // Register the information for the entry associated with this target region.
6550   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6551       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6552       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6553 }
6554 
6555 /// Checks if the expression is constant or does not have non-trivial function
6556 /// calls.
6557 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6558   // We can skip constant expressions.
6559   // We can skip expressions with trivial calls or simple expressions.
6560   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6561           !E->hasNonTrivialCall(Ctx)) &&
6562          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6563 }
6564 
6565 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6566                                                     const Stmt *Body) {
6567   const Stmt *Child = Body->IgnoreContainers();
6568   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6569     Child = nullptr;
6570     for (const Stmt *S : C->body()) {
6571       if (const auto *E = dyn_cast<Expr>(S)) {
6572         if (isTrivial(Ctx, E))
6573           continue;
6574       }
6575       // Some of the statements can be ignored.
6576       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6577           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6578         continue;
6579       // Analyze declarations.
6580       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6581         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6582               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6583                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6584                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6585                   isa<UsingDirectiveDecl>(D) ||
6586                   isa<OMPDeclareReductionDecl>(D) ||
6587                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6588                 return true;
6589               const auto *VD = dyn_cast<VarDecl>(D);
6590               if (!VD)
6591                 return false;
6592               return VD->isConstexpr() ||
6593                      ((VD->getType().isTrivialType(Ctx) ||
6594                        VD->getType()->isReferenceType()) &&
6595                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6596             }))
6597           continue;
6598       }
6599       // Found multiple children - cannot get the one child only.
6600       if (Child)
6601         return nullptr;
6602       Child = S;
6603     }
6604     if (Child)
6605       Child = Child->IgnoreContainers();
6606   }
6607   return Child;
6608 }
6609 
6610 /// Emit the number of teams for a target directive.  Inspect the num_teams
6611 /// clause associated with a teams construct combined or closely nested
6612 /// with the target directive.
6613 ///
6614 /// Emit a team of size one for directives such as 'target parallel' that
6615 /// have no associated teams construct.
6616 ///
6617 /// Otherwise, return nullptr.
6618 static llvm::Value *
6619 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6620                                const OMPExecutableDirective &D) {
6621   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6622          "Clauses associated with the teams directive expected to be emitted "
6623          "only for the host!");
6624   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6625   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6626          "Expected target-based executable directive.");
6627   CGBuilderTy &Bld = CGF.Builder;
6628   switch (DirectiveKind) {
6629   case OMPD_target: {
6630     const auto *CS = D.getInnermostCapturedStmt();
6631     const auto *Body =
6632         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6633     const Stmt *ChildStmt =
6634         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6635     if (const auto *NestedDir =
6636             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6637       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6638         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6639           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6640           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6641           const Expr *NumTeams =
6642               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643           llvm::Value *NumTeamsVal =
6644               CGF.EmitScalarExpr(NumTeams,
6645                                  /*IgnoreResultAssign*/ true);
6646           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6647                                    /*isSigned=*/true);
6648         }
6649         return Bld.getInt32(0);
6650       }
6651       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6652           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6653         return Bld.getInt32(1);
6654       return Bld.getInt32(0);
6655     }
6656     return nullptr;
6657   }
6658   case OMPD_target_teams:
6659   case OMPD_target_teams_distribute:
6660   case OMPD_target_teams_distribute_simd:
6661   case OMPD_target_teams_distribute_parallel_for:
6662   case OMPD_target_teams_distribute_parallel_for_simd: {
6663     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6664       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6665       const Expr *NumTeams =
6666           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6667       llvm::Value *NumTeamsVal =
6668           CGF.EmitScalarExpr(NumTeams,
6669                              /*IgnoreResultAssign*/ true);
6670       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6671                                /*isSigned=*/true);
6672     }
6673     return Bld.getInt32(0);
6674   }
6675   case OMPD_target_parallel:
6676   case OMPD_target_parallel_for:
6677   case OMPD_target_parallel_for_simd:
6678   case OMPD_target_simd:
6679     return Bld.getInt32(1);
6680   case OMPD_parallel:
6681   case OMPD_for:
6682   case OMPD_parallel_for:
6683   case OMPD_parallel_master:
6684   case OMPD_parallel_sections:
6685   case OMPD_for_simd:
6686   case OMPD_parallel_for_simd:
6687   case OMPD_cancel:
6688   case OMPD_cancellation_point:
6689   case OMPD_ordered:
6690   case OMPD_threadprivate:
6691   case OMPD_allocate:
6692   case OMPD_task:
6693   case OMPD_simd:
6694   case OMPD_sections:
6695   case OMPD_section:
6696   case OMPD_single:
6697   case OMPD_master:
6698   case OMPD_critical:
6699   case OMPD_taskyield:
6700   case OMPD_barrier:
6701   case OMPD_taskwait:
6702   case OMPD_taskgroup:
6703   case OMPD_atomic:
6704   case OMPD_flush:
6705   case OMPD_teams:
6706   case OMPD_target_data:
6707   case OMPD_target_exit_data:
6708   case OMPD_target_enter_data:
6709   case OMPD_distribute:
6710   case OMPD_distribute_simd:
6711   case OMPD_distribute_parallel_for:
6712   case OMPD_distribute_parallel_for_simd:
6713   case OMPD_teams_distribute:
6714   case OMPD_teams_distribute_simd:
6715   case OMPD_teams_distribute_parallel_for:
6716   case OMPD_teams_distribute_parallel_for_simd:
6717   case OMPD_target_update:
6718   case OMPD_declare_simd:
6719   case OMPD_declare_variant:
6720   case OMPD_declare_target:
6721   case OMPD_end_declare_target:
6722   case OMPD_declare_reduction:
6723   case OMPD_declare_mapper:
6724   case OMPD_taskloop:
6725   case OMPD_taskloop_simd:
6726   case OMPD_master_taskloop:
6727   case OMPD_master_taskloop_simd:
6728   case OMPD_parallel_master_taskloop:
6729   case OMPD_parallel_master_taskloop_simd:
6730   case OMPD_requires:
6731   case OMPD_unknown:
6732     break;
6733   }
6734   llvm_unreachable("Unexpected directive kind.");
6735 }
6736 
6737 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6738                                   llvm::Value *DefaultThreadLimitVal) {
6739   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6740       CGF.getContext(), CS->getCapturedStmt());
6741   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6742     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6743       llvm::Value *NumThreads = nullptr;
6744       llvm::Value *CondVal = nullptr;
6745       // Handle if clause. If if clause present, the number of threads is
6746       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6747       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6748         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6749         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6750         const OMPIfClause *IfClause = nullptr;
6751         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6752           if (C->getNameModifier() == OMPD_unknown ||
6753               C->getNameModifier() == OMPD_parallel) {
6754             IfClause = C;
6755             break;
6756           }
6757         }
6758         if (IfClause) {
6759           const Expr *Cond = IfClause->getCondition();
6760           bool Result;
6761           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6762             if (!Result)
6763               return CGF.Builder.getInt32(1);
6764           } else {
6765             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6766             if (const auto *PreInit =
6767                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6768               for (const auto *I : PreInit->decls()) {
6769                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6770                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6771                 } else {
6772                   CodeGenFunction::AutoVarEmission Emission =
6773                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6774                   CGF.EmitAutoVarCleanups(Emission);
6775                 }
6776               }
6777             }
6778             CondVal = CGF.EvaluateExprAsBool(Cond);
6779           }
6780         }
6781       }
6782       // Check the value of num_threads clause iff if clause was not specified
6783       // or is not evaluated to false.
6784       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6785         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6786         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6787         const auto *NumThreadsClause =
6788             Dir->getSingleClause<OMPNumThreadsClause>();
6789         CodeGenFunction::LexicalScope Scope(
6790             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6791         if (const auto *PreInit =
6792                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6793           for (const auto *I : PreInit->decls()) {
6794             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6795               CGF.EmitVarDecl(cast<VarDecl>(*I));
6796             } else {
6797               CodeGenFunction::AutoVarEmission Emission =
6798                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6799               CGF.EmitAutoVarCleanups(Emission);
6800             }
6801           }
6802         }
6803         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6804         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6805                                                /*isSigned=*/false);
6806         if (DefaultThreadLimitVal)
6807           NumThreads = CGF.Builder.CreateSelect(
6808               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6809               DefaultThreadLimitVal, NumThreads);
6810       } else {
6811         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6812                                            : CGF.Builder.getInt32(0);
6813       }
6814       // Process condition of the if clause.
6815       if (CondVal) {
6816         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6817                                               CGF.Builder.getInt32(1));
6818       }
6819       return NumThreads;
6820     }
6821     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6822       return CGF.Builder.getInt32(1);
6823     return DefaultThreadLimitVal;
6824   }
6825   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6826                                : CGF.Builder.getInt32(0);
6827 }
6828 
6829 /// Emit the number of threads for a target directive.  Inspect the
6830 /// thread_limit clause associated with a teams construct combined or closely
6831 /// nested with the target directive.
6832 ///
6833 /// Emit the num_threads clause for directives such as 'target parallel' that
6834 /// have no associated teams construct.
6835 ///
6836 /// Otherwise, return nullptr.
6837 static llvm::Value *
6838 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6839                                  const OMPExecutableDirective &D) {
6840   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6841          "Clauses associated with the teams directive expected to be emitted "
6842          "only for the host!");
6843   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6844   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6845          "Expected target-based executable directive.");
6846   CGBuilderTy &Bld = CGF.Builder;
6847   llvm::Value *ThreadLimitVal = nullptr;
6848   llvm::Value *NumThreadsVal = nullptr;
6849   switch (DirectiveKind) {
6850   case OMPD_target: {
6851     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6852     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6853       return NumThreads;
6854     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6855         CGF.getContext(), CS->getCapturedStmt());
6856     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6857       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6858         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6859         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6860         const auto *ThreadLimitClause =
6861             Dir->getSingleClause<OMPThreadLimitClause>();
6862         CodeGenFunction::LexicalScope Scope(
6863             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6864         if (const auto *PreInit =
6865                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6866           for (const auto *I : PreInit->decls()) {
6867             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6868               CGF.EmitVarDecl(cast<VarDecl>(*I));
6869             } else {
6870               CodeGenFunction::AutoVarEmission Emission =
6871                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6872               CGF.EmitAutoVarCleanups(Emission);
6873             }
6874           }
6875         }
6876         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6877             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6878         ThreadLimitVal =
6879             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6880       }
6881       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6882           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6883         CS = Dir->getInnermostCapturedStmt();
6884         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6885             CGF.getContext(), CS->getCapturedStmt());
6886         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6887       }
6888       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6889           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6890         CS = Dir->getInnermostCapturedStmt();
6891         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6892           return NumThreads;
6893       }
6894       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6895         return Bld.getInt32(1);
6896     }
6897     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6898   }
6899   case OMPD_target_teams: {
6900     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6901       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6902       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6903       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6904           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6905       ThreadLimitVal =
6906           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6907     }
6908     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6909     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6910       return NumThreads;
6911     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6912         CGF.getContext(), CS->getCapturedStmt());
6913     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6914       if (Dir->getDirectiveKind() == OMPD_distribute) {
6915         CS = Dir->getInnermostCapturedStmt();
6916         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6917           return NumThreads;
6918       }
6919     }
6920     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6921   }
6922   case OMPD_target_teams_distribute:
6923     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6924       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6925       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6926       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6927           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6928       ThreadLimitVal =
6929           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6930     }
6931     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6932   case OMPD_target_parallel:
6933   case OMPD_target_parallel_for:
6934   case OMPD_target_parallel_for_simd:
6935   case OMPD_target_teams_distribute_parallel_for:
6936   case OMPD_target_teams_distribute_parallel_for_simd: {
6937     llvm::Value *CondVal = nullptr;
6938     // Handle if clause. If if clause present, the number of threads is
6939     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6940     if (D.hasClausesOfKind<OMPIfClause>()) {
6941       const OMPIfClause *IfClause = nullptr;
6942       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6943         if (C->getNameModifier() == OMPD_unknown ||
6944             C->getNameModifier() == OMPD_parallel) {
6945           IfClause = C;
6946           break;
6947         }
6948       }
6949       if (IfClause) {
6950         const Expr *Cond = IfClause->getCondition();
6951         bool Result;
6952         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6953           if (!Result)
6954             return Bld.getInt32(1);
6955         } else {
6956           CodeGenFunction::RunCleanupsScope Scope(CGF);
6957           CondVal = CGF.EvaluateExprAsBool(Cond);
6958         }
6959       }
6960     }
6961     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6962       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6963       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6964       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6965           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6966       ThreadLimitVal =
6967           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6968     }
6969     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6970       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6971       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6972       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6973           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6974       NumThreadsVal =
6975           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6976       ThreadLimitVal = ThreadLimitVal
6977                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6978                                                                 ThreadLimitVal),
6979                                               NumThreadsVal, ThreadLimitVal)
6980                            : NumThreadsVal;
6981     }
6982     if (!ThreadLimitVal)
6983       ThreadLimitVal = Bld.getInt32(0);
6984     if (CondVal)
6985       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6986     return ThreadLimitVal;
6987   }
6988   case OMPD_target_teams_distribute_simd:
6989   case OMPD_target_simd:
6990     return Bld.getInt32(1);
6991   case OMPD_parallel:
6992   case OMPD_for:
6993   case OMPD_parallel_for:
6994   case OMPD_parallel_master:
6995   case OMPD_parallel_sections:
6996   case OMPD_for_simd:
6997   case OMPD_parallel_for_simd:
6998   case OMPD_cancel:
6999   case OMPD_cancellation_point:
7000   case OMPD_ordered:
7001   case OMPD_threadprivate:
7002   case OMPD_allocate:
7003   case OMPD_task:
7004   case OMPD_simd:
7005   case OMPD_sections:
7006   case OMPD_section:
7007   case OMPD_single:
7008   case OMPD_master:
7009   case OMPD_critical:
7010   case OMPD_taskyield:
7011   case OMPD_barrier:
7012   case OMPD_taskwait:
7013   case OMPD_taskgroup:
7014   case OMPD_atomic:
7015   case OMPD_flush:
7016   case OMPD_teams:
7017   case OMPD_target_data:
7018   case OMPD_target_exit_data:
7019   case OMPD_target_enter_data:
7020   case OMPD_distribute:
7021   case OMPD_distribute_simd:
7022   case OMPD_distribute_parallel_for:
7023   case OMPD_distribute_parallel_for_simd:
7024   case OMPD_teams_distribute:
7025   case OMPD_teams_distribute_simd:
7026   case OMPD_teams_distribute_parallel_for:
7027   case OMPD_teams_distribute_parallel_for_simd:
7028   case OMPD_target_update:
7029   case OMPD_declare_simd:
7030   case OMPD_declare_variant:
7031   case OMPD_declare_target:
7032   case OMPD_end_declare_target:
7033   case OMPD_declare_reduction:
7034   case OMPD_declare_mapper:
7035   case OMPD_taskloop:
7036   case OMPD_taskloop_simd:
7037   case OMPD_master_taskloop:
7038   case OMPD_master_taskloop_simd:
7039   case OMPD_parallel_master_taskloop:
7040   case OMPD_parallel_master_taskloop_simd:
7041   case OMPD_requires:
7042   case OMPD_unknown:
7043     break;
7044   }
7045   llvm_unreachable("Unsupported directive kind.");
7046 }
7047 
7048 namespace {
7049 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7050 
7051 // Utility to handle information from clauses associated with a given
7052 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7053 // It provides a convenient interface to obtain the information and generate
7054 // code for that information.
7055 class MappableExprsHandler {
7056 public:
7057   /// Values for bit flags used to specify the mapping type for
7058   /// offloading.
7059   enum OpenMPOffloadMappingFlags : uint64_t {
7060     /// No flags
7061     OMP_MAP_NONE = 0x0,
7062     /// Allocate memory on the device and move data from host to device.
7063     OMP_MAP_TO = 0x01,
7064     /// Allocate memory on the device and move data from device to host.
7065     OMP_MAP_FROM = 0x02,
7066     /// Always perform the requested mapping action on the element, even
7067     /// if it was already mapped before.
7068     OMP_MAP_ALWAYS = 0x04,
7069     /// Delete the element from the device environment, ignoring the
7070     /// current reference count associated with the element.
7071     OMP_MAP_DELETE = 0x08,
7072     /// The element being mapped is a pointer-pointee pair; both the
7073     /// pointer and the pointee should be mapped.
7074     OMP_MAP_PTR_AND_OBJ = 0x10,
7075     /// This flags signals that the base address of an entry should be
7076     /// passed to the target kernel as an argument.
7077     OMP_MAP_TARGET_PARAM = 0x20,
7078     /// Signal that the runtime library has to return the device pointer
7079     /// in the current position for the data being mapped. Used when we have the
7080     /// use_device_ptr clause.
7081     OMP_MAP_RETURN_PARAM = 0x40,
7082     /// This flag signals that the reference being passed is a pointer to
7083     /// private data.
7084     OMP_MAP_PRIVATE = 0x80,
7085     /// Pass the element to the device by value.
7086     OMP_MAP_LITERAL = 0x100,
7087     /// Implicit map
7088     OMP_MAP_IMPLICIT = 0x200,
7089     /// Close is a hint to the runtime to allocate memory close to
7090     /// the target device.
7091     OMP_MAP_CLOSE = 0x400,
7092     /// The 16 MSBs of the flags indicate whether the entry is member of some
7093     /// struct/class.
7094     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7095     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7096   };
7097 
7098   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7099   static unsigned getFlagMemberOffset() {
7100     unsigned Offset = 0;
7101     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7102          Remain = Remain >> 1)
7103       Offset++;
7104     return Offset;
7105   }
7106 
7107   /// Class that associates information with a base pointer to be passed to the
7108   /// runtime library.
7109   class BasePointerInfo {
7110     /// The base pointer.
7111     llvm::Value *Ptr = nullptr;
7112     /// The base declaration that refers to this device pointer, or null if
7113     /// there is none.
7114     const ValueDecl *DevPtrDecl = nullptr;
7115 
7116   public:
7117     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7118         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7119     llvm::Value *operator*() const { return Ptr; }
7120     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7121     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7122   };
7123 
7124   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7125   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7126   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7127 
7128   /// Map between a struct and the its lowest & highest elements which have been
7129   /// mapped.
7130   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7131   ///                    HE(FieldIndex, Pointer)}
7132   struct StructRangeInfoTy {
7133     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7134         0, Address::invalid()};
7135     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7136         0, Address::invalid()};
7137     Address Base = Address::invalid();
7138   };
7139 
7140 private:
7141   /// Kind that defines how a device pointer has to be returned.
7142   struct MapInfo {
7143     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7144     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7145     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7146     bool ReturnDevicePointer = false;
7147     bool IsImplicit = false;
7148 
7149     MapInfo() = default;
7150     MapInfo(
7151         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7152         OpenMPMapClauseKind MapType,
7153         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7154         bool ReturnDevicePointer, bool IsImplicit)
7155         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7156           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7157   };
7158 
7159   /// If use_device_ptr is used on a pointer which is a struct member and there
7160   /// is no map information about it, then emission of that entry is deferred
7161   /// until the whole struct has been processed.
7162   struct DeferredDevicePtrEntryTy {
7163     const Expr *IE = nullptr;
7164     const ValueDecl *VD = nullptr;
7165 
7166     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7167         : IE(IE), VD(VD) {}
7168   };
7169 
7170   /// The target directive from where the mappable clauses were extracted. It
7171   /// is either a executable directive or a user-defined mapper directive.
7172   llvm::PointerUnion<const OMPExecutableDirective *,
7173                      const OMPDeclareMapperDecl *>
7174       CurDir;
7175 
7176   /// Function the directive is being generated for.
7177   CodeGenFunction &CGF;
7178 
7179   /// Set of all first private variables in the current directive.
7180   /// bool data is set to true if the variable is implicitly marked as
7181   /// firstprivate, false otherwise.
7182   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7183 
7184   /// Map between device pointer declarations and their expression components.
7185   /// The key value for declarations in 'this' is null.
7186   llvm::DenseMap<
7187       const ValueDecl *,
7188       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7189       DevPointersMap;
7190 
7191   llvm::Value *getExprTypeSize(const Expr *E) const {
7192     QualType ExprTy = E->getType().getCanonicalType();
7193 
7194     // Reference types are ignored for mapping purposes.
7195     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7196       ExprTy = RefTy->getPointeeType().getCanonicalType();
7197 
7198     // Given that an array section is considered a built-in type, we need to
7199     // do the calculation based on the length of the section instead of relying
7200     // on CGF.getTypeSize(E->getType()).
7201     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7202       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7203                             OAE->getBase()->IgnoreParenImpCasts())
7204                             .getCanonicalType();
7205 
7206       // If there is no length associated with the expression and lower bound is
7207       // not specified too, that means we are using the whole length of the
7208       // base.
7209       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7210           !OAE->getLowerBound())
7211         return CGF.getTypeSize(BaseTy);
7212 
7213       llvm::Value *ElemSize;
7214       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7215         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7216       } else {
7217         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7218         assert(ATy && "Expecting array type if not a pointer type.");
7219         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7220       }
7221 
7222       // If we don't have a length at this point, that is because we have an
7223       // array section with a single element.
7224       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7225         return ElemSize;
7226 
7227       if (const Expr *LenExpr = OAE->getLength()) {
7228         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7229         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7230                                              CGF.getContext().getSizeType(),
7231                                              LenExpr->getExprLoc());
7232         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7233       }
7234       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7235              OAE->getLowerBound() && "expected array_section[lb:].");
7236       // Size = sizetype - lb * elemtype;
7237       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7238       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7239       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7240                                        CGF.getContext().getSizeType(),
7241                                        OAE->getLowerBound()->getExprLoc());
7242       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7243       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7244       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7245       LengthVal = CGF.Builder.CreateSelect(
7246           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7247       return LengthVal;
7248     }
7249     return CGF.getTypeSize(ExprTy);
7250   }
7251 
7252   /// Return the corresponding bits for a given map clause modifier. Add
7253   /// a flag marking the map as a pointer if requested. Add a flag marking the
7254   /// map as the first one of a series of maps that relate to the same map
7255   /// expression.
7256   OpenMPOffloadMappingFlags getMapTypeBits(
7257       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7258       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7259     OpenMPOffloadMappingFlags Bits =
7260         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7261     switch (MapType) {
7262     case OMPC_MAP_alloc:
7263     case OMPC_MAP_release:
7264       // alloc and release is the default behavior in the runtime library,  i.e.
7265       // if we don't pass any bits alloc/release that is what the runtime is
7266       // going to do. Therefore, we don't need to signal anything for these two
7267       // type modifiers.
7268       break;
7269     case OMPC_MAP_to:
7270       Bits |= OMP_MAP_TO;
7271       break;
7272     case OMPC_MAP_from:
7273       Bits |= OMP_MAP_FROM;
7274       break;
7275     case OMPC_MAP_tofrom:
7276       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7277       break;
7278     case OMPC_MAP_delete:
7279       Bits |= OMP_MAP_DELETE;
7280       break;
7281     case OMPC_MAP_unknown:
7282       llvm_unreachable("Unexpected map type!");
7283     }
7284     if (AddPtrFlag)
7285       Bits |= OMP_MAP_PTR_AND_OBJ;
7286     if (AddIsTargetParamFlag)
7287       Bits |= OMP_MAP_TARGET_PARAM;
7288     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7289         != MapModifiers.end())
7290       Bits |= OMP_MAP_ALWAYS;
7291     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7292         != MapModifiers.end())
7293       Bits |= OMP_MAP_CLOSE;
7294     return Bits;
7295   }
7296 
7297   /// Return true if the provided expression is a final array section. A
7298   /// final array section, is one whose length can't be proved to be one.
7299   bool isFinalArraySectionExpression(const Expr *E) const {
7300     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7301 
7302     // It is not an array section and therefore not a unity-size one.
7303     if (!OASE)
7304       return false;
7305 
7306     // An array section with no colon always refer to a single element.
7307     if (OASE->getColonLoc().isInvalid())
7308       return false;
7309 
7310     const Expr *Length = OASE->getLength();
7311 
7312     // If we don't have a length we have to check if the array has size 1
7313     // for this dimension. Also, we should always expect a length if the
7314     // base type is pointer.
7315     if (!Length) {
7316       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7317                              OASE->getBase()->IgnoreParenImpCasts())
7318                              .getCanonicalType();
7319       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7320         return ATy->getSize().getSExtValue() != 1;
7321       // If we don't have a constant dimension length, we have to consider
7322       // the current section as having any size, so it is not necessarily
7323       // unitary. If it happen to be unity size, that's user fault.
7324       return true;
7325     }
7326 
7327     // Check if the length evaluates to 1.
7328     Expr::EvalResult Result;
7329     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7330       return true; // Can have more that size 1.
7331 
7332     llvm::APSInt ConstLength = Result.Val.getInt();
7333     return ConstLength.getSExtValue() != 1;
7334   }
7335 
7336   /// Generate the base pointers, section pointers, sizes and map type
7337   /// bits for the provided map type, map modifier, and expression components.
7338   /// \a IsFirstComponent should be set to true if the provided set of
7339   /// components is the first associated with a capture.
7340   void generateInfoForComponentList(
7341       OpenMPMapClauseKind MapType,
7342       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7343       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7344       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7345       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7346       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7347       bool IsImplicit,
7348       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7349           OverlappedElements = llvm::None) const {
7350     // The following summarizes what has to be generated for each map and the
7351     // types below. The generated information is expressed in this order:
7352     // base pointer, section pointer, size, flags
7353     // (to add to the ones that come from the map type and modifier).
7354     //
7355     // double d;
7356     // int i[100];
7357     // float *p;
7358     //
7359     // struct S1 {
7360     //   int i;
7361     //   float f[50];
7362     // }
7363     // struct S2 {
7364     //   int i;
7365     //   float f[50];
7366     //   S1 s;
7367     //   double *p;
7368     //   struct S2 *ps;
7369     // }
7370     // S2 s;
7371     // S2 *ps;
7372     //
7373     // map(d)
7374     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7375     //
7376     // map(i)
7377     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7378     //
7379     // map(i[1:23])
7380     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7381     //
7382     // map(p)
7383     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7384     //
7385     // map(p[1:24])
7386     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7387     //
7388     // map(s)
7389     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7390     //
7391     // map(s.i)
7392     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7393     //
7394     // map(s.s.f)
7395     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7396     //
7397     // map(s.p)
7398     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7399     //
7400     // map(to: s.p[:22])
7401     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7402     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7403     // &(s.p), &(s.p[0]), 22*sizeof(double),
7404     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7405     // (*) alloc space for struct members, only this is a target parameter
7406     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7407     //      optimizes this entry out, same in the examples below)
7408     // (***) map the pointee (map: to)
7409     //
7410     // map(s.ps)
7411     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7412     //
7413     // map(from: s.ps->s.i)
7414     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7415     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7416     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7417     //
7418     // map(to: s.ps->ps)
7419     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7420     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7421     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7422     //
7423     // map(s.ps->ps->ps)
7424     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7425     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7426     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7427     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7428     //
7429     // map(to: s.ps->ps->s.f[:22])
7430     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7431     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7432     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7433     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7434     //
7435     // map(ps)
7436     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7437     //
7438     // map(ps->i)
7439     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7440     //
7441     // map(ps->s.f)
7442     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7443     //
7444     // map(from: ps->p)
7445     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7446     //
7447     // map(to: ps->p[:22])
7448     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7449     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7450     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7451     //
7452     // map(ps->ps)
7453     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7454     //
7455     // map(from: ps->ps->s.i)
7456     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7457     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7458     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7459     //
7460     // map(from: ps->ps->ps)
7461     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7462     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7463     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7464     //
7465     // map(ps->ps->ps->ps)
7466     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7467     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7468     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7469     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7470     //
7471     // map(to: ps->ps->ps->s.f[:22])
7472     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7473     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7474     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7475     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7476     //
7477     // map(to: s.f[:22]) map(from: s.p[:33])
7478     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7479     //     sizeof(double*) (**), TARGET_PARAM
7480     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7481     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7482     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7483     // (*) allocate contiguous space needed to fit all mapped members even if
7484     //     we allocate space for members not mapped (in this example,
7485     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7486     //     them as well because they fall between &s.f[0] and &s.p)
7487     //
7488     // map(from: s.f[:22]) map(to: ps->p[:33])
7489     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7490     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7491     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7492     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7493     // (*) the struct this entry pertains to is the 2nd element in the list of
7494     //     arguments, hence MEMBER_OF(2)
7495     //
7496     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7497     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7498     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7499     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7500     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7501     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7502     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7503     // (*) the struct this entry pertains to is the 4th element in the list
7504     //     of arguments, hence MEMBER_OF(4)
7505 
7506     // Track if the map information being generated is the first for a capture.
7507     bool IsCaptureFirstInfo = IsFirstComponentList;
7508     // When the variable is on a declare target link or in a to clause with
7509     // unified memory, a reference is needed to hold the host/device address
7510     // of the variable.
7511     bool RequiresReference = false;
7512 
7513     // Scan the components from the base to the complete expression.
7514     auto CI = Components.rbegin();
7515     auto CE = Components.rend();
7516     auto I = CI;
7517 
7518     // Track if the map information being generated is the first for a list of
7519     // components.
7520     bool IsExpressionFirstInfo = true;
7521     Address BP = Address::invalid();
7522     const Expr *AssocExpr = I->getAssociatedExpression();
7523     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7524     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7525 
7526     if (isa<MemberExpr>(AssocExpr)) {
7527       // The base is the 'this' pointer. The content of the pointer is going
7528       // to be the base of the field being mapped.
7529       BP = CGF.LoadCXXThisAddress();
7530     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7531                (OASE &&
7532                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7533       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7534     } else {
7535       // The base is the reference to the variable.
7536       // BP = &Var.
7537       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7538       if (const auto *VD =
7539               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7540         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7541                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7542           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7543               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7544                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7545             RequiresReference = true;
7546             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7547           }
7548         }
7549       }
7550 
7551       // If the variable is a pointer and is being dereferenced (i.e. is not
7552       // the last component), the base has to be the pointer itself, not its
7553       // reference. References are ignored for mapping purposes.
7554       QualType Ty =
7555           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7556       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7557         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7558 
7559         // We do not need to generate individual map information for the
7560         // pointer, it can be associated with the combined storage.
7561         ++I;
7562       }
7563     }
7564 
7565     // Track whether a component of the list should be marked as MEMBER_OF some
7566     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7567     // in a component list should be marked as MEMBER_OF, all subsequent entries
7568     // do not belong to the base struct. E.g.
7569     // struct S2 s;
7570     // s.ps->ps->ps->f[:]
7571     //   (1) (2) (3) (4)
7572     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7573     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7574     // is the pointee of ps(2) which is not member of struct s, so it should not
7575     // be marked as such (it is still PTR_AND_OBJ).
7576     // The variable is initialized to false so that PTR_AND_OBJ entries which
7577     // are not struct members are not considered (e.g. array of pointers to
7578     // data).
7579     bool ShouldBeMemberOf = false;
7580 
7581     // Variable keeping track of whether or not we have encountered a component
7582     // in the component list which is a member expression. Useful when we have a
7583     // pointer or a final array section, in which case it is the previous
7584     // component in the list which tells us whether we have a member expression.
7585     // E.g. X.f[:]
7586     // While processing the final array section "[:]" it is "f" which tells us
7587     // whether we are dealing with a member of a declared struct.
7588     const MemberExpr *EncounteredME = nullptr;
7589 
7590     for (; I != CE; ++I) {
7591       // If the current component is member of a struct (parent struct) mark it.
7592       if (!EncounteredME) {
7593         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7594         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7595         // as MEMBER_OF the parent struct.
7596         if (EncounteredME)
7597           ShouldBeMemberOf = true;
7598       }
7599 
7600       auto Next = std::next(I);
7601 
7602       // We need to generate the addresses and sizes if this is the last
7603       // component, if the component is a pointer or if it is an array section
7604       // whose length can't be proved to be one. If this is a pointer, it
7605       // becomes the base address for the following components.
7606 
7607       // A final array section, is one whose length can't be proved to be one.
7608       bool IsFinalArraySection =
7609           isFinalArraySectionExpression(I->getAssociatedExpression());
7610 
7611       // Get information on whether the element is a pointer. Have to do a
7612       // special treatment for array sections given that they are built-in
7613       // types.
7614       const auto *OASE =
7615           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7616       bool IsPointer =
7617           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7618                        .getCanonicalType()
7619                        ->isAnyPointerType()) ||
7620           I->getAssociatedExpression()->getType()->isAnyPointerType();
7621 
7622       if (Next == CE || IsPointer || IsFinalArraySection) {
7623         // If this is not the last component, we expect the pointer to be
7624         // associated with an array expression or member expression.
7625         assert((Next == CE ||
7626                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7627                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7628                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7629                "Unexpected expression");
7630 
7631         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7632                          .getAddress(CGF);
7633 
7634         // If this component is a pointer inside the base struct then we don't
7635         // need to create any entry for it - it will be combined with the object
7636         // it is pointing to into a single PTR_AND_OBJ entry.
7637         bool IsMemberPointer =
7638             IsPointer && EncounteredME &&
7639             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7640              EncounteredME);
7641         if (!OverlappedElements.empty()) {
7642           // Handle base element with the info for overlapped elements.
7643           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7644           assert(Next == CE &&
7645                  "Expected last element for the overlapped elements.");
7646           assert(!IsPointer &&
7647                  "Unexpected base element with the pointer type.");
7648           // Mark the whole struct as the struct that requires allocation on the
7649           // device.
7650           PartialStruct.LowestElem = {0, LB};
7651           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7652               I->getAssociatedExpression()->getType());
7653           Address HB = CGF.Builder.CreateConstGEP(
7654               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7655                                                               CGF.VoidPtrTy),
7656               TypeSize.getQuantity() - 1);
7657           PartialStruct.HighestElem = {
7658               std::numeric_limits<decltype(
7659                   PartialStruct.HighestElem.first)>::max(),
7660               HB};
7661           PartialStruct.Base = BP;
7662           // Emit data for non-overlapped data.
7663           OpenMPOffloadMappingFlags Flags =
7664               OMP_MAP_MEMBER_OF |
7665               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7666                              /*AddPtrFlag=*/false,
7667                              /*AddIsTargetParamFlag=*/false);
7668           LB = BP;
7669           llvm::Value *Size = nullptr;
7670           // Do bitcopy of all non-overlapped structure elements.
7671           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7672                    Component : OverlappedElements) {
7673             Address ComponentLB = Address::invalid();
7674             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7675                  Component) {
7676               if (MC.getAssociatedDeclaration()) {
7677                 ComponentLB =
7678                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7679                         .getAddress(CGF);
7680                 Size = CGF.Builder.CreatePtrDiff(
7681                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7682                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7683                 break;
7684               }
7685             }
7686             BasePointers.push_back(BP.getPointer());
7687             Pointers.push_back(LB.getPointer());
7688             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7689                                                       /*isSigned=*/true));
7690             Types.push_back(Flags);
7691             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7692           }
7693           BasePointers.push_back(BP.getPointer());
7694           Pointers.push_back(LB.getPointer());
7695           Size = CGF.Builder.CreatePtrDiff(
7696               CGF.EmitCastToVoidPtr(
7697                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7698               CGF.EmitCastToVoidPtr(LB.getPointer()));
7699           Sizes.push_back(
7700               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7701           Types.push_back(Flags);
7702           break;
7703         }
7704         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7705         if (!IsMemberPointer) {
7706           BasePointers.push_back(BP.getPointer());
7707           Pointers.push_back(LB.getPointer());
7708           Sizes.push_back(
7709               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7710 
7711           // We need to add a pointer flag for each map that comes from the
7712           // same expression except for the first one. We also need to signal
7713           // this map is the first one that relates with the current capture
7714           // (there is a set of entries for each capture).
7715           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7716               MapType, MapModifiers, IsImplicit,
7717               !IsExpressionFirstInfo || RequiresReference,
7718               IsCaptureFirstInfo && !RequiresReference);
7719 
7720           if (!IsExpressionFirstInfo) {
7721             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7722             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7723             if (IsPointer)
7724               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7725                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7726 
7727             if (ShouldBeMemberOf) {
7728               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7729               // should be later updated with the correct value of MEMBER_OF.
7730               Flags |= OMP_MAP_MEMBER_OF;
7731               // From now on, all subsequent PTR_AND_OBJ entries should not be
7732               // marked as MEMBER_OF.
7733               ShouldBeMemberOf = false;
7734             }
7735           }
7736 
7737           Types.push_back(Flags);
7738         }
7739 
7740         // If we have encountered a member expression so far, keep track of the
7741         // mapped member. If the parent is "*this", then the value declaration
7742         // is nullptr.
7743         if (EncounteredME) {
7744           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7745           unsigned FieldIndex = FD->getFieldIndex();
7746 
7747           // Update info about the lowest and highest elements for this struct
7748           if (!PartialStruct.Base.isValid()) {
7749             PartialStruct.LowestElem = {FieldIndex, LB};
7750             PartialStruct.HighestElem = {FieldIndex, LB};
7751             PartialStruct.Base = BP;
7752           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7753             PartialStruct.LowestElem = {FieldIndex, LB};
7754           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7755             PartialStruct.HighestElem = {FieldIndex, LB};
7756           }
7757         }
7758 
7759         // If we have a final array section, we are done with this expression.
7760         if (IsFinalArraySection)
7761           break;
7762 
7763         // The pointer becomes the base for the next element.
7764         if (Next != CE)
7765           BP = LB;
7766 
7767         IsExpressionFirstInfo = false;
7768         IsCaptureFirstInfo = false;
7769       }
7770     }
7771   }
7772 
7773   /// Return the adjusted map modifiers if the declaration a capture refers to
7774   /// appears in a first-private clause. This is expected to be used only with
7775   /// directives that start with 'target'.
7776   MappableExprsHandler::OpenMPOffloadMappingFlags
7777   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7778     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7779 
7780     // A first private variable captured by reference will use only the
7781     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7782     // declaration is known as first-private in this handler.
7783     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7784       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7785           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7786         return MappableExprsHandler::OMP_MAP_ALWAYS |
7787                MappableExprsHandler::OMP_MAP_TO;
7788       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7789         return MappableExprsHandler::OMP_MAP_TO |
7790                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7791       return MappableExprsHandler::OMP_MAP_PRIVATE |
7792              MappableExprsHandler::OMP_MAP_TO;
7793     }
7794     return MappableExprsHandler::OMP_MAP_TO |
7795            MappableExprsHandler::OMP_MAP_FROM;
7796   }
7797 
7798   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7799     // Rotate by getFlagMemberOffset() bits.
7800     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7801                                                   << getFlagMemberOffset());
7802   }
7803 
7804   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7805                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7806     // If the entry is PTR_AND_OBJ but has not been marked with the special
7807     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7808     // marked as MEMBER_OF.
7809     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7810         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7811       return;
7812 
7813     // Reset the placeholder value to prepare the flag for the assignment of the
7814     // proper MEMBER_OF value.
7815     Flags &= ~OMP_MAP_MEMBER_OF;
7816     Flags |= MemberOfFlag;
7817   }
7818 
7819   void getPlainLayout(const CXXRecordDecl *RD,
7820                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7821                       bool AsBase) const {
7822     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7823 
7824     llvm::StructType *St =
7825         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7826 
7827     unsigned NumElements = St->getNumElements();
7828     llvm::SmallVector<
7829         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7830         RecordLayout(NumElements);
7831 
7832     // Fill bases.
7833     for (const auto &I : RD->bases()) {
7834       if (I.isVirtual())
7835         continue;
7836       const auto *Base = I.getType()->getAsCXXRecordDecl();
7837       // Ignore empty bases.
7838       if (Base->isEmpty() || CGF.getContext()
7839                                  .getASTRecordLayout(Base)
7840                                  .getNonVirtualSize()
7841                                  .isZero())
7842         continue;
7843 
7844       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7845       RecordLayout[FieldIndex] = Base;
7846     }
7847     // Fill in virtual bases.
7848     for (const auto &I : RD->vbases()) {
7849       const auto *Base = I.getType()->getAsCXXRecordDecl();
7850       // Ignore empty bases.
7851       if (Base->isEmpty())
7852         continue;
7853       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7854       if (RecordLayout[FieldIndex])
7855         continue;
7856       RecordLayout[FieldIndex] = Base;
7857     }
7858     // Fill in all the fields.
7859     assert(!RD->isUnion() && "Unexpected union.");
7860     for (const auto *Field : RD->fields()) {
7861       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7862       // will fill in later.)
7863       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7864         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7865         RecordLayout[FieldIndex] = Field;
7866       }
7867     }
7868     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7869              &Data : RecordLayout) {
7870       if (Data.isNull())
7871         continue;
7872       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7873         getPlainLayout(Base, Layout, /*AsBase=*/true);
7874       else
7875         Layout.push_back(Data.get<const FieldDecl *>());
7876     }
7877   }
7878 
7879 public:
7880   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7881       : CurDir(&Dir), CGF(CGF) {
7882     // Extract firstprivate clause information.
7883     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7884       for (const auto *D : C->varlists())
7885         FirstPrivateDecls.try_emplace(
7886             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7887     // Extract device pointer clause information.
7888     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7889       for (auto L : C->component_lists())
7890         DevPointersMap[L.first].push_back(L.second);
7891   }
7892 
7893   /// Constructor for the declare mapper directive.
7894   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7895       : CurDir(&Dir), CGF(CGF) {}
7896 
7897   /// Generate code for the combined entry if we have a partially mapped struct
7898   /// and take care of the mapping flags of the arguments corresponding to
7899   /// individual struct members.
7900   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7901                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7902                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7903                          const StructRangeInfoTy &PartialStruct) const {
7904     // Base is the base of the struct
7905     BasePointers.push_back(PartialStruct.Base.getPointer());
7906     // Pointer is the address of the lowest element
7907     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7908     Pointers.push_back(LB);
7909     // Size is (addr of {highest+1} element) - (addr of lowest element)
7910     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7911     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7912     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7913     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7914     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7915     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7916                                                   /*isSigned=*/false);
7917     Sizes.push_back(Size);
7918     // Map type is always TARGET_PARAM
7919     Types.push_back(OMP_MAP_TARGET_PARAM);
7920     // Remove TARGET_PARAM flag from the first element
7921     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7922 
7923     // All other current entries will be MEMBER_OF the combined entry
7924     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7925     // 0xFFFF in the MEMBER_OF field).
7926     OpenMPOffloadMappingFlags MemberOfFlag =
7927         getMemberOfFlag(BasePointers.size() - 1);
7928     for (auto &M : CurTypes)
7929       setCorrectMemberOfFlag(M, MemberOfFlag);
7930   }
7931 
7932   /// Generate all the base pointers, section pointers, sizes and map
7933   /// types for the extracted mappable expressions. Also, for each item that
7934   /// relates with a device pointer, a pair of the relevant declaration and
7935   /// index where it occurs is appended to the device pointers info array.
7936   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7937                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7938                        MapFlagsArrayTy &Types) const {
7939     // We have to process the component lists that relate with the same
7940     // declaration in a single chunk so that we can generate the map flags
7941     // correctly. Therefore, we organize all lists in a map.
7942     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7943 
7944     // Helper function to fill the information map for the different supported
7945     // clauses.
7946     auto &&InfoGen = [&Info](
7947         const ValueDecl *D,
7948         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7949         OpenMPMapClauseKind MapType,
7950         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7951         bool ReturnDevicePointer, bool IsImplicit) {
7952       const ValueDecl *VD =
7953           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7954       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7955                             IsImplicit);
7956     };
7957 
7958     assert(CurDir.is<const OMPExecutableDirective *>() &&
7959            "Expect a executable directive");
7960     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7961     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7962       for (const auto L : C->component_lists()) {
7963         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7964             /*ReturnDevicePointer=*/false, C->isImplicit());
7965       }
7966     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7967       for (const auto L : C->component_lists()) {
7968         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7969             /*ReturnDevicePointer=*/false, C->isImplicit());
7970       }
7971     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7972       for (const auto L : C->component_lists()) {
7973         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7974             /*ReturnDevicePointer=*/false, C->isImplicit());
7975       }
7976 
7977     // Look at the use_device_ptr clause information and mark the existing map
7978     // entries as such. If there is no map information for an entry in the
7979     // use_device_ptr list, we create one with map type 'alloc' and zero size
7980     // section. It is the user fault if that was not mapped before. If there is
7981     // no map information and the pointer is a struct member, then we defer the
7982     // emission of that entry until the whole struct has been processed.
7983     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7984         DeferredInfo;
7985 
7986     for (const auto *C :
7987          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7988       for (const auto L : C->component_lists()) {
7989         assert(!L.second.empty() && "Not expecting empty list of components!");
7990         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7991         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7992         const Expr *IE = L.second.back().getAssociatedExpression();
7993         // If the first component is a member expression, we have to look into
7994         // 'this', which maps to null in the map of map information. Otherwise
7995         // look directly for the information.
7996         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7997 
7998         // We potentially have map information for this declaration already.
7999         // Look for the first set of components that refer to it.
8000         if (It != Info.end()) {
8001           auto CI = std::find_if(
8002               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8003                 return MI.Components.back().getAssociatedDeclaration() == VD;
8004               });
8005           // If we found a map entry, signal that the pointer has to be returned
8006           // and move on to the next declaration.
8007           if (CI != It->second.end()) {
8008             CI->ReturnDevicePointer = true;
8009             continue;
8010           }
8011         }
8012 
8013         // We didn't find any match in our map information - generate a zero
8014         // size array section - if the pointer is a struct member we defer this
8015         // action until the whole struct has been processed.
8016         if (isa<MemberExpr>(IE)) {
8017           // Insert the pointer into Info to be processed by
8018           // generateInfoForComponentList. Because it is a member pointer
8019           // without a pointee, no entry will be generated for it, therefore
8020           // we need to generate one after the whole struct has been processed.
8021           // Nonetheless, generateInfoForComponentList must be called to take
8022           // the pointer into account for the calculation of the range of the
8023           // partial struct.
8024           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8025                   /*ReturnDevicePointer=*/false, C->isImplicit());
8026           DeferredInfo[nullptr].emplace_back(IE, VD);
8027         } else {
8028           llvm::Value *Ptr =
8029               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8030           BasePointers.emplace_back(Ptr, VD);
8031           Pointers.push_back(Ptr);
8032           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8033           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8034         }
8035       }
8036     }
8037 
8038     for (const auto &M : Info) {
8039       // We need to know when we generate information for the first component
8040       // associated with a capture, because the mapping flags depend on it.
8041       bool IsFirstComponentList = true;
8042 
8043       // Temporary versions of arrays
8044       MapBaseValuesArrayTy CurBasePointers;
8045       MapValuesArrayTy CurPointers;
8046       MapValuesArrayTy CurSizes;
8047       MapFlagsArrayTy CurTypes;
8048       StructRangeInfoTy PartialStruct;
8049 
8050       for (const MapInfo &L : M.second) {
8051         assert(!L.Components.empty() &&
8052                "Not expecting declaration with no component lists.");
8053 
8054         // Remember the current base pointer index.
8055         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8056         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8057                                      CurBasePointers, CurPointers, CurSizes,
8058                                      CurTypes, PartialStruct,
8059                                      IsFirstComponentList, L.IsImplicit);
8060 
8061         // If this entry relates with a device pointer, set the relevant
8062         // declaration and add the 'return pointer' flag.
8063         if (L.ReturnDevicePointer) {
8064           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8065                  "Unexpected number of mapped base pointers.");
8066 
8067           const ValueDecl *RelevantVD =
8068               L.Components.back().getAssociatedDeclaration();
8069           assert(RelevantVD &&
8070                  "No relevant declaration related with device pointer??");
8071 
8072           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8073           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8074         }
8075         IsFirstComponentList = false;
8076       }
8077 
8078       // Append any pending zero-length pointers which are struct members and
8079       // used with use_device_ptr.
8080       auto CI = DeferredInfo.find(M.first);
8081       if (CI != DeferredInfo.end()) {
8082         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8083           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8084           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8085               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8086           CurBasePointers.emplace_back(BasePtr, L.VD);
8087           CurPointers.push_back(Ptr);
8088           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8089           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8090           // value MEMBER_OF=FFFF so that the entry is later updated with the
8091           // correct value of MEMBER_OF.
8092           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8093                              OMP_MAP_MEMBER_OF);
8094         }
8095       }
8096 
8097       // If there is an entry in PartialStruct it means we have a struct with
8098       // individual members mapped. Emit an extra combined entry.
8099       if (PartialStruct.Base.isValid())
8100         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8101                           PartialStruct);
8102 
8103       // We need to append the results of this capture to what we already have.
8104       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8105       Pointers.append(CurPointers.begin(), CurPointers.end());
8106       Sizes.append(CurSizes.begin(), CurSizes.end());
8107       Types.append(CurTypes.begin(), CurTypes.end());
8108     }
8109   }
8110 
8111   /// Generate all the base pointers, section pointers, sizes and map types for
8112   /// the extracted map clauses of user-defined mapper.
8113   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8114                                 MapValuesArrayTy &Pointers,
8115                                 MapValuesArrayTy &Sizes,
8116                                 MapFlagsArrayTy &Types) const {
8117     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8118            "Expect a declare mapper directive");
8119     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8120     // We have to process the component lists that relate with the same
8121     // declaration in a single chunk so that we can generate the map flags
8122     // correctly. Therefore, we organize all lists in a map.
8123     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8124 
8125     // Helper function to fill the information map for the different supported
8126     // clauses.
8127     auto &&InfoGen = [&Info](
8128         const ValueDecl *D,
8129         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8130         OpenMPMapClauseKind MapType,
8131         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8132         bool ReturnDevicePointer, bool IsImplicit) {
8133       const ValueDecl *VD =
8134           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8135       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8136                             IsImplicit);
8137     };
8138 
8139     for (const auto *C : CurMapperDir->clauselists()) {
8140       const auto *MC = cast<OMPMapClause>(C);
8141       for (const auto L : MC->component_lists()) {
8142         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8143                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8144       }
8145     }
8146 
8147     for (const auto &M : Info) {
8148       // We need to know when we generate information for the first component
8149       // associated with a capture, because the mapping flags depend on it.
8150       bool IsFirstComponentList = true;
8151 
8152       // Temporary versions of arrays
8153       MapBaseValuesArrayTy CurBasePointers;
8154       MapValuesArrayTy CurPointers;
8155       MapValuesArrayTy CurSizes;
8156       MapFlagsArrayTy CurTypes;
8157       StructRangeInfoTy PartialStruct;
8158 
8159       for (const MapInfo &L : M.second) {
8160         assert(!L.Components.empty() &&
8161                "Not expecting declaration with no component lists.");
8162         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8163                                      CurBasePointers, CurPointers, CurSizes,
8164                                      CurTypes, PartialStruct,
8165                                      IsFirstComponentList, L.IsImplicit);
8166         IsFirstComponentList = false;
8167       }
8168 
8169       // If there is an entry in PartialStruct it means we have a struct with
8170       // individual members mapped. Emit an extra combined entry.
8171       if (PartialStruct.Base.isValid())
8172         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8173                           PartialStruct);
8174 
8175       // We need to append the results of this capture to what we already have.
8176       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8177       Pointers.append(CurPointers.begin(), CurPointers.end());
8178       Sizes.append(CurSizes.begin(), CurSizes.end());
8179       Types.append(CurTypes.begin(), CurTypes.end());
8180     }
8181   }
8182 
8183   /// Emit capture info for lambdas for variables captured by reference.
8184   void generateInfoForLambdaCaptures(
8185       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8186       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8187       MapFlagsArrayTy &Types,
8188       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8189     const auto *RD = VD->getType()
8190                          .getCanonicalType()
8191                          .getNonReferenceType()
8192                          ->getAsCXXRecordDecl();
8193     if (!RD || !RD->isLambda())
8194       return;
8195     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8196     LValue VDLVal = CGF.MakeAddrLValue(
8197         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8198     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8199     FieldDecl *ThisCapture = nullptr;
8200     RD->getCaptureFields(Captures, ThisCapture);
8201     if (ThisCapture) {
8202       LValue ThisLVal =
8203           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8204       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8205       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8206                                  VDLVal.getPointer(CGF));
8207       BasePointers.push_back(ThisLVal.getPointer(CGF));
8208       Pointers.push_back(ThisLValVal.getPointer(CGF));
8209       Sizes.push_back(
8210           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8211                                     CGF.Int64Ty, /*isSigned=*/true));
8212       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8213                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8214     }
8215     for (const LambdaCapture &LC : RD->captures()) {
8216       if (!LC.capturesVariable())
8217         continue;
8218       const VarDecl *VD = LC.getCapturedVar();
8219       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8220         continue;
8221       auto It = Captures.find(VD);
8222       assert(It != Captures.end() && "Found lambda capture without field.");
8223       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8224       if (LC.getCaptureKind() == LCK_ByRef) {
8225         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8226         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8227                                    VDLVal.getPointer(CGF));
8228         BasePointers.push_back(VarLVal.getPointer(CGF));
8229         Pointers.push_back(VarLValVal.getPointer(CGF));
8230         Sizes.push_back(CGF.Builder.CreateIntCast(
8231             CGF.getTypeSize(
8232                 VD->getType().getCanonicalType().getNonReferenceType()),
8233             CGF.Int64Ty, /*isSigned=*/true));
8234       } else {
8235         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8236         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8237                                    VDLVal.getPointer(CGF));
8238         BasePointers.push_back(VarLVal.getPointer(CGF));
8239         Pointers.push_back(VarRVal.getScalarVal());
8240         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8241       }
8242       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8243                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8244     }
8245   }
8246 
8247   /// Set correct indices for lambdas captures.
8248   void adjustMemberOfForLambdaCaptures(
8249       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8250       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8251       MapFlagsArrayTy &Types) const {
8252     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8253       // Set correct member_of idx for all implicit lambda captures.
8254       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8255                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8256         continue;
8257       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8258       assert(BasePtr && "Unable to find base lambda address.");
8259       int TgtIdx = -1;
8260       for (unsigned J = I; J > 0; --J) {
8261         unsigned Idx = J - 1;
8262         if (Pointers[Idx] != BasePtr)
8263           continue;
8264         TgtIdx = Idx;
8265         break;
8266       }
8267       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8268       // All other current entries will be MEMBER_OF the combined entry
8269       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8270       // 0xFFFF in the MEMBER_OF field).
8271       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8272       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8273     }
8274   }
8275 
8276   /// Generate the base pointers, section pointers, sizes and map types
8277   /// associated to a given capture.
8278   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8279                               llvm::Value *Arg,
8280                               MapBaseValuesArrayTy &BasePointers,
8281                               MapValuesArrayTy &Pointers,
8282                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8283                               StructRangeInfoTy &PartialStruct) const {
8284     assert(!Cap->capturesVariableArrayType() &&
8285            "Not expecting to generate map info for a variable array type!");
8286 
8287     // We need to know when we generating information for the first component
8288     const ValueDecl *VD = Cap->capturesThis()
8289                               ? nullptr
8290                               : Cap->getCapturedVar()->getCanonicalDecl();
8291 
8292     // If this declaration appears in a is_device_ptr clause we just have to
8293     // pass the pointer by value. If it is a reference to a declaration, we just
8294     // pass its value.
8295     if (DevPointersMap.count(VD)) {
8296       BasePointers.emplace_back(Arg, VD);
8297       Pointers.push_back(Arg);
8298       Sizes.push_back(
8299           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8300                                     CGF.Int64Ty, /*isSigned=*/true));
8301       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8302       return;
8303     }
8304 
8305     using MapData =
8306         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8307                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8308     SmallVector<MapData, 4> DeclComponentLists;
8309     assert(CurDir.is<const OMPExecutableDirective *>() &&
8310            "Expect a executable directive");
8311     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8312     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8313       for (const auto L : C->decl_component_lists(VD)) {
8314         assert(L.first == VD &&
8315                "We got information for the wrong declaration??");
8316         assert(!L.second.empty() &&
8317                "Not expecting declaration with no component lists.");
8318         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8319                                         C->getMapTypeModifiers(),
8320                                         C->isImplicit());
8321       }
8322     }
8323 
8324     // Find overlapping elements (including the offset from the base element).
8325     llvm::SmallDenseMap<
8326         const MapData *,
8327         llvm::SmallVector<
8328             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8329         4>
8330         OverlappedData;
8331     size_t Count = 0;
8332     for (const MapData &L : DeclComponentLists) {
8333       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8334       OpenMPMapClauseKind MapType;
8335       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8336       bool IsImplicit;
8337       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8338       ++Count;
8339       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8340         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8341         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8342         auto CI = Components.rbegin();
8343         auto CE = Components.rend();
8344         auto SI = Components1.rbegin();
8345         auto SE = Components1.rend();
8346         for (; CI != CE && SI != SE; ++CI, ++SI) {
8347           if (CI->getAssociatedExpression()->getStmtClass() !=
8348               SI->getAssociatedExpression()->getStmtClass())
8349             break;
8350           // Are we dealing with different variables/fields?
8351           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8352             break;
8353         }
8354         // Found overlapping if, at least for one component, reached the head of
8355         // the components list.
8356         if (CI == CE || SI == SE) {
8357           assert((CI != CE || SI != SE) &&
8358                  "Unexpected full match of the mapping components.");
8359           const MapData &BaseData = CI == CE ? L : L1;
8360           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8361               SI == SE ? Components : Components1;
8362           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8363           OverlappedElements.getSecond().push_back(SubData);
8364         }
8365       }
8366     }
8367     // Sort the overlapped elements for each item.
8368     llvm::SmallVector<const FieldDecl *, 4> Layout;
8369     if (!OverlappedData.empty()) {
8370       if (const auto *CRD =
8371               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8372         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8373       else {
8374         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8375         Layout.append(RD->field_begin(), RD->field_end());
8376       }
8377     }
8378     for (auto &Pair : OverlappedData) {
8379       llvm::sort(
8380           Pair.getSecond(),
8381           [&Layout](
8382               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8383               OMPClauseMappableExprCommon::MappableExprComponentListRef
8384                   Second) {
8385             auto CI = First.rbegin();
8386             auto CE = First.rend();
8387             auto SI = Second.rbegin();
8388             auto SE = Second.rend();
8389             for (; CI != CE && SI != SE; ++CI, ++SI) {
8390               if (CI->getAssociatedExpression()->getStmtClass() !=
8391                   SI->getAssociatedExpression()->getStmtClass())
8392                 break;
8393               // Are we dealing with different variables/fields?
8394               if (CI->getAssociatedDeclaration() !=
8395                   SI->getAssociatedDeclaration())
8396                 break;
8397             }
8398 
8399             // Lists contain the same elements.
8400             if (CI == CE && SI == SE)
8401               return false;
8402 
8403             // List with less elements is less than list with more elements.
8404             if (CI == CE || SI == SE)
8405               return CI == CE;
8406 
8407             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8408             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8409             if (FD1->getParent() == FD2->getParent())
8410               return FD1->getFieldIndex() < FD2->getFieldIndex();
8411             const auto It =
8412                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8413                   return FD == FD1 || FD == FD2;
8414                 });
8415             return *It == FD1;
8416           });
8417     }
8418 
8419     // Associated with a capture, because the mapping flags depend on it.
8420     // Go through all of the elements with the overlapped elements.
8421     for (const auto &Pair : OverlappedData) {
8422       const MapData &L = *Pair.getFirst();
8423       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8424       OpenMPMapClauseKind MapType;
8425       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8426       bool IsImplicit;
8427       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8428       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8429           OverlappedComponents = Pair.getSecond();
8430       bool IsFirstComponentList = true;
8431       generateInfoForComponentList(MapType, MapModifiers, Components,
8432                                    BasePointers, Pointers, Sizes, Types,
8433                                    PartialStruct, IsFirstComponentList,
8434                                    IsImplicit, OverlappedComponents);
8435     }
8436     // Go through other elements without overlapped elements.
8437     bool IsFirstComponentList = OverlappedData.empty();
8438     for (const MapData &L : DeclComponentLists) {
8439       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8440       OpenMPMapClauseKind MapType;
8441       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8442       bool IsImplicit;
8443       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8444       auto It = OverlappedData.find(&L);
8445       if (It == OverlappedData.end())
8446         generateInfoForComponentList(MapType, MapModifiers, Components,
8447                                      BasePointers, Pointers, Sizes, Types,
8448                                      PartialStruct, IsFirstComponentList,
8449                                      IsImplicit);
8450       IsFirstComponentList = false;
8451     }
8452   }
8453 
8454   /// Generate the base pointers, section pointers, sizes and map types
8455   /// associated with the declare target link variables.
8456   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8457                                         MapValuesArrayTy &Pointers,
8458                                         MapValuesArrayTy &Sizes,
8459                                         MapFlagsArrayTy &Types) const {
8460     assert(CurDir.is<const OMPExecutableDirective *>() &&
8461            "Expect a executable directive");
8462     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8463     // Map other list items in the map clause which are not captured variables
8464     // but "declare target link" global variables.
8465     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8466       for (const auto L : C->component_lists()) {
8467         if (!L.first)
8468           continue;
8469         const auto *VD = dyn_cast<VarDecl>(L.first);
8470         if (!VD)
8471           continue;
8472         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8473             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8474         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8475             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8476           continue;
8477         StructRangeInfoTy PartialStruct;
8478         generateInfoForComponentList(
8479             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8480             Pointers, Sizes, Types, PartialStruct,
8481             /*IsFirstComponentList=*/true, C->isImplicit());
8482         assert(!PartialStruct.Base.isValid() &&
8483                "No partial structs for declare target link expected.");
8484       }
8485     }
8486   }
8487 
8488   /// Generate the default map information for a given capture \a CI,
8489   /// record field declaration \a RI and captured value \a CV.
8490   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8491                               const FieldDecl &RI, llvm::Value *CV,
8492                               MapBaseValuesArrayTy &CurBasePointers,
8493                               MapValuesArrayTy &CurPointers,
8494                               MapValuesArrayTy &CurSizes,
8495                               MapFlagsArrayTy &CurMapTypes) const {
8496     bool IsImplicit = true;
8497     // Do the default mapping.
8498     if (CI.capturesThis()) {
8499       CurBasePointers.push_back(CV);
8500       CurPointers.push_back(CV);
8501       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8502       CurSizes.push_back(
8503           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8504                                     CGF.Int64Ty, /*isSigned=*/true));
8505       // Default map type.
8506       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8507     } else if (CI.capturesVariableByCopy()) {
8508       CurBasePointers.push_back(CV);
8509       CurPointers.push_back(CV);
8510       if (!RI.getType()->isAnyPointerType()) {
8511         // We have to signal to the runtime captures passed by value that are
8512         // not pointers.
8513         CurMapTypes.push_back(OMP_MAP_LITERAL);
8514         CurSizes.push_back(CGF.Builder.CreateIntCast(
8515             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8516       } else {
8517         // Pointers are implicitly mapped with a zero size and no flags
8518         // (other than first map that is added for all implicit maps).
8519         CurMapTypes.push_back(OMP_MAP_NONE);
8520         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8521       }
8522       const VarDecl *VD = CI.getCapturedVar();
8523       auto I = FirstPrivateDecls.find(VD);
8524       if (I != FirstPrivateDecls.end())
8525         IsImplicit = I->getSecond();
8526     } else {
8527       assert(CI.capturesVariable() && "Expected captured reference.");
8528       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8529       QualType ElementType = PtrTy->getPointeeType();
8530       CurSizes.push_back(CGF.Builder.CreateIntCast(
8531           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8532       // The default map type for a scalar/complex type is 'to' because by
8533       // default the value doesn't have to be retrieved. For an aggregate
8534       // type, the default is 'tofrom'.
8535       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8536       const VarDecl *VD = CI.getCapturedVar();
8537       auto I = FirstPrivateDecls.find(VD);
8538       if (I != FirstPrivateDecls.end() &&
8539           VD->getType().isConstant(CGF.getContext())) {
8540         llvm::Constant *Addr =
8541             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8542         // Copy the value of the original variable to the new global copy.
8543         CGF.Builder.CreateMemCpy(
8544             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8545             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8546             CurSizes.back(), /*IsVolatile=*/false);
8547         // Use new global variable as the base pointers.
8548         CurBasePointers.push_back(Addr);
8549         CurPointers.push_back(Addr);
8550       } else {
8551         CurBasePointers.push_back(CV);
8552         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8553           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8554               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8555               AlignmentSource::Decl));
8556           CurPointers.push_back(PtrAddr.getPointer());
8557         } else {
8558           CurPointers.push_back(CV);
8559         }
8560       }
8561       if (I != FirstPrivateDecls.end())
8562         IsImplicit = I->getSecond();
8563     }
8564     // Every default map produces a single argument which is a target parameter.
8565     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8566 
8567     // Add flag stating this is an implicit map.
8568     if (IsImplicit)
8569       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8570   }
8571 };
8572 } // anonymous namespace
8573 
8574 /// Emit the arrays used to pass the captures and map information to the
8575 /// offloading runtime library. If there is no map or capture information,
8576 /// return nullptr by reference.
8577 static void
8578 emitOffloadingArrays(CodeGenFunction &CGF,
8579                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8580                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8581                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8582                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8583                      CGOpenMPRuntime::TargetDataInfo &Info) {
8584   CodeGenModule &CGM = CGF.CGM;
8585   ASTContext &Ctx = CGF.getContext();
8586 
8587   // Reset the array information.
8588   Info.clearArrayInfo();
8589   Info.NumberOfPtrs = BasePointers.size();
8590 
8591   if (Info.NumberOfPtrs) {
8592     // Detect if we have any capture size requiring runtime evaluation of the
8593     // size so that a constant array could be eventually used.
8594     bool hasRuntimeEvaluationCaptureSize = false;
8595     for (llvm::Value *S : Sizes)
8596       if (!isa<llvm::Constant>(S)) {
8597         hasRuntimeEvaluationCaptureSize = true;
8598         break;
8599       }
8600 
8601     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8602     QualType PointerArrayType = Ctx.getConstantArrayType(
8603         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8604         /*IndexTypeQuals=*/0);
8605 
8606     Info.BasePointersArray =
8607         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8608     Info.PointersArray =
8609         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8610 
8611     // If we don't have any VLA types or other types that require runtime
8612     // evaluation, we can use a constant array for the map sizes, otherwise we
8613     // need to fill up the arrays as we do for the pointers.
8614     QualType Int64Ty =
8615         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8616     if (hasRuntimeEvaluationCaptureSize) {
8617       QualType SizeArrayType = Ctx.getConstantArrayType(
8618           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8619           /*IndexTypeQuals=*/0);
8620       Info.SizesArray =
8621           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8622     } else {
8623       // We expect all the sizes to be constant, so we collect them to create
8624       // a constant array.
8625       SmallVector<llvm::Constant *, 16> ConstSizes;
8626       for (llvm::Value *S : Sizes)
8627         ConstSizes.push_back(cast<llvm::Constant>(S));
8628 
8629       auto *SizesArrayInit = llvm::ConstantArray::get(
8630           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8631       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8632       auto *SizesArrayGbl = new llvm::GlobalVariable(
8633           CGM.getModule(), SizesArrayInit->getType(),
8634           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8635           SizesArrayInit, Name);
8636       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8637       Info.SizesArray = SizesArrayGbl;
8638     }
8639 
8640     // The map types are always constant so we don't need to generate code to
8641     // fill arrays. Instead, we create an array constant.
8642     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8643     llvm::copy(MapTypes, Mapping.begin());
8644     llvm::Constant *MapTypesArrayInit =
8645         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8646     std::string MaptypesName =
8647         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8648     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8649         CGM.getModule(), MapTypesArrayInit->getType(),
8650         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8651         MapTypesArrayInit, MaptypesName);
8652     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8653     Info.MapTypesArray = MapTypesArrayGbl;
8654 
8655     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8656       llvm::Value *BPVal = *BasePointers[I];
8657       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8658           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8659           Info.BasePointersArray, 0, I);
8660       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8661           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8662       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8663       CGF.Builder.CreateStore(BPVal, BPAddr);
8664 
8665       if (Info.requiresDevicePointerInfo())
8666         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8667           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8668 
8669       llvm::Value *PVal = Pointers[I];
8670       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8671           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8672           Info.PointersArray, 0, I);
8673       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8674           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8675       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8676       CGF.Builder.CreateStore(PVal, PAddr);
8677 
8678       if (hasRuntimeEvaluationCaptureSize) {
8679         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8680             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8681             Info.SizesArray,
8682             /*Idx0=*/0,
8683             /*Idx1=*/I);
8684         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8685         CGF.Builder.CreateStore(
8686             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8687             SAddr);
8688       }
8689     }
8690   }
8691 }
8692 
8693 /// Emit the arguments to be passed to the runtime library based on the
8694 /// arrays of pointers, sizes and map types.
8695 static void emitOffloadingArraysArgument(
8696     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8697     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8698     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8699   CodeGenModule &CGM = CGF.CGM;
8700   if (Info.NumberOfPtrs) {
8701     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8702         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8703         Info.BasePointersArray,
8704         /*Idx0=*/0, /*Idx1=*/0);
8705     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8706         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8707         Info.PointersArray,
8708         /*Idx0=*/0,
8709         /*Idx1=*/0);
8710     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8711         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8712         /*Idx0=*/0, /*Idx1=*/0);
8713     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8714         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8715         Info.MapTypesArray,
8716         /*Idx0=*/0,
8717         /*Idx1=*/0);
8718   } else {
8719     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8720     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8721     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8722     MapTypesArrayArg =
8723         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8724   }
8725 }
8726 
8727 /// Check for inner distribute directive.
8728 static const OMPExecutableDirective *
8729 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8730   const auto *CS = D.getInnermostCapturedStmt();
8731   const auto *Body =
8732       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8733   const Stmt *ChildStmt =
8734       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8735 
8736   if (const auto *NestedDir =
8737           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8738     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8739     switch (D.getDirectiveKind()) {
8740     case OMPD_target:
8741       if (isOpenMPDistributeDirective(DKind))
8742         return NestedDir;
8743       if (DKind == OMPD_teams) {
8744         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8745             /*IgnoreCaptured=*/true);
8746         if (!Body)
8747           return nullptr;
8748         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8749         if (const auto *NND =
8750                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8751           DKind = NND->getDirectiveKind();
8752           if (isOpenMPDistributeDirective(DKind))
8753             return NND;
8754         }
8755       }
8756       return nullptr;
8757     case OMPD_target_teams:
8758       if (isOpenMPDistributeDirective(DKind))
8759         return NestedDir;
8760       return nullptr;
8761     case OMPD_target_parallel:
8762     case OMPD_target_simd:
8763     case OMPD_target_parallel_for:
8764     case OMPD_target_parallel_for_simd:
8765       return nullptr;
8766     case OMPD_target_teams_distribute:
8767     case OMPD_target_teams_distribute_simd:
8768     case OMPD_target_teams_distribute_parallel_for:
8769     case OMPD_target_teams_distribute_parallel_for_simd:
8770     case OMPD_parallel:
8771     case OMPD_for:
8772     case OMPD_parallel_for:
8773     case OMPD_parallel_master:
8774     case OMPD_parallel_sections:
8775     case OMPD_for_simd:
8776     case OMPD_parallel_for_simd:
8777     case OMPD_cancel:
8778     case OMPD_cancellation_point:
8779     case OMPD_ordered:
8780     case OMPD_threadprivate:
8781     case OMPD_allocate:
8782     case OMPD_task:
8783     case OMPD_simd:
8784     case OMPD_sections:
8785     case OMPD_section:
8786     case OMPD_single:
8787     case OMPD_master:
8788     case OMPD_critical:
8789     case OMPD_taskyield:
8790     case OMPD_barrier:
8791     case OMPD_taskwait:
8792     case OMPD_taskgroup:
8793     case OMPD_atomic:
8794     case OMPD_flush:
8795     case OMPD_teams:
8796     case OMPD_target_data:
8797     case OMPD_target_exit_data:
8798     case OMPD_target_enter_data:
8799     case OMPD_distribute:
8800     case OMPD_distribute_simd:
8801     case OMPD_distribute_parallel_for:
8802     case OMPD_distribute_parallel_for_simd:
8803     case OMPD_teams_distribute:
8804     case OMPD_teams_distribute_simd:
8805     case OMPD_teams_distribute_parallel_for:
8806     case OMPD_teams_distribute_parallel_for_simd:
8807     case OMPD_target_update:
8808     case OMPD_declare_simd:
8809     case OMPD_declare_variant:
8810     case OMPD_declare_target:
8811     case OMPD_end_declare_target:
8812     case OMPD_declare_reduction:
8813     case OMPD_declare_mapper:
8814     case OMPD_taskloop:
8815     case OMPD_taskloop_simd:
8816     case OMPD_master_taskloop:
8817     case OMPD_master_taskloop_simd:
8818     case OMPD_parallel_master_taskloop:
8819     case OMPD_parallel_master_taskloop_simd:
8820     case OMPD_requires:
8821     case OMPD_unknown:
8822       llvm_unreachable("Unexpected directive.");
8823     }
8824   }
8825 
8826   return nullptr;
8827 }
8828 
8829 /// Emit the user-defined mapper function. The code generation follows the
8830 /// pattern in the example below.
8831 /// \code
8832 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8833 ///                                           void *base, void *begin,
8834 ///                                           int64_t size, int64_t type) {
8835 ///   // Allocate space for an array section first.
8836 ///   if (size > 1 && !maptype.IsDelete)
8837 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8838 ///                                 size*sizeof(Ty), clearToFrom(type));
8839 ///   // Map members.
8840 ///   for (unsigned i = 0; i < size; i++) {
8841 ///     // For each component specified by this mapper:
8842 ///     for (auto c : all_components) {
8843 ///       if (c.hasMapper())
8844 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8845 ///                       c.arg_type);
8846 ///       else
8847 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8848 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8849 ///     }
8850 ///   }
8851 ///   // Delete the array section.
8852 ///   if (size > 1 && maptype.IsDelete)
8853 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8854 ///                                 size*sizeof(Ty), clearToFrom(type));
8855 /// }
8856 /// \endcode
8857 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8858                                             CodeGenFunction *CGF) {
8859   if (UDMMap.count(D) > 0)
8860     return;
8861   ASTContext &C = CGM.getContext();
8862   QualType Ty = D->getType();
8863   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8864   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8865   auto *MapperVarDecl =
8866       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8867   SourceLocation Loc = D->getLocation();
8868   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8869 
8870   // Prepare mapper function arguments and attributes.
8871   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8872                               C.VoidPtrTy, ImplicitParamDecl::Other);
8873   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8874                             ImplicitParamDecl::Other);
8875   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8876                              C.VoidPtrTy, ImplicitParamDecl::Other);
8877   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8878                             ImplicitParamDecl::Other);
8879   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8880                             ImplicitParamDecl::Other);
8881   FunctionArgList Args;
8882   Args.push_back(&HandleArg);
8883   Args.push_back(&BaseArg);
8884   Args.push_back(&BeginArg);
8885   Args.push_back(&SizeArg);
8886   Args.push_back(&TypeArg);
8887   const CGFunctionInfo &FnInfo =
8888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8889   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8890   SmallString<64> TyStr;
8891   llvm::raw_svector_ostream Out(TyStr);
8892   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8893   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8894   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8895                                     Name, &CGM.getModule());
8896   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8897   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8898   // Start the mapper function code generation.
8899   CodeGenFunction MapperCGF(CGM);
8900   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8901   // Compute the starting and end addreses of array elements.
8902   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8903       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8904       C.getPointerType(Int64Ty), Loc);
8905   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8906       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8907       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8908   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8909   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8910       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8911       C.getPointerType(Int64Ty), Loc);
8912   // Prepare common arguments for array initiation and deletion.
8913   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8914       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8915       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8916   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8917       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8918       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8919   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8920       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8921       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8922 
8923   // Emit array initiation if this is an array section and \p MapType indicates
8924   // that memory allocation is required.
8925   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8926   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8927                              ElementSize, HeadBB, /*IsInit=*/true);
8928 
8929   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8930 
8931   // Emit the loop header block.
8932   MapperCGF.EmitBlock(HeadBB);
8933   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8934   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8935   // Evaluate whether the initial condition is satisfied.
8936   llvm::Value *IsEmpty =
8937       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8938   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8939   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8940 
8941   // Emit the loop body block.
8942   MapperCGF.EmitBlock(BodyBB);
8943   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8944       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8945   PtrPHI->addIncoming(PtrBegin, EntryBB);
8946   Address PtrCurrent =
8947       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8948                           .getAlignment()
8949                           .alignmentOfArrayElement(ElementSize));
8950   // Privatize the declared variable of mapper to be the current array element.
8951   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8952   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8953     return MapperCGF
8954         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8955         .getAddress(MapperCGF);
8956   });
8957   (void)Scope.Privatize();
8958 
8959   // Get map clause information. Fill up the arrays with all mapped variables.
8960   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8961   MappableExprsHandler::MapValuesArrayTy Pointers;
8962   MappableExprsHandler::MapValuesArrayTy Sizes;
8963   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8964   MappableExprsHandler MEHandler(*D, MapperCGF);
8965   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8966 
8967   // Call the runtime API __tgt_mapper_num_components to get the number of
8968   // pre-existing components.
8969   llvm::Value *OffloadingArgs[] = {Handle};
8970   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8971       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8972   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8973       PreviousSize,
8974       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8975 
8976   // Fill up the runtime mapper handle for all components.
8977   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8978     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8979         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8980     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8981         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8982     llvm::Value *CurSizeArg = Sizes[I];
8983 
8984     // Extract the MEMBER_OF field from the map type.
8985     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8986     MapperCGF.EmitBlock(MemberBB);
8987     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8988     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8989         OriMapType,
8990         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8991     llvm::BasicBlock *MemberCombineBB =
8992         MapperCGF.createBasicBlock("omp.member.combine");
8993     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8994     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8995     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8996     // Add the number of pre-existing components to the MEMBER_OF field if it
8997     // is valid.
8998     MapperCGF.EmitBlock(MemberCombineBB);
8999     llvm::Value *CombinedMember =
9000         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9001     // Do nothing if it is not a member of previous components.
9002     MapperCGF.EmitBlock(TypeBB);
9003     llvm::PHINode *MemberMapType =
9004         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9005     MemberMapType->addIncoming(OriMapType, MemberBB);
9006     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9007 
9008     // Combine the map type inherited from user-defined mapper with that
9009     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9010     // bits of the \a MapType, which is the input argument of the mapper
9011     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9012     // bits of MemberMapType.
9013     // [OpenMP 5.0], 1.2.6. map-type decay.
9014     //        | alloc |  to   | from  | tofrom | release | delete
9015     // ----------------------------------------------------------
9016     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9017     // to     | alloc |  to   | alloc |   to   | release | delete
9018     // from   | alloc | alloc | from  |  from  | release | delete
9019     // tofrom | alloc |  to   | from  | tofrom | release | delete
9020     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9021         MapType,
9022         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9023                                    MappableExprsHandler::OMP_MAP_FROM));
9024     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9025     llvm::BasicBlock *AllocElseBB =
9026         MapperCGF.createBasicBlock("omp.type.alloc.else");
9027     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9028     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9029     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9030     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9031     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9032     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9033     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9034     MapperCGF.EmitBlock(AllocBB);
9035     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9036         MemberMapType,
9037         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9038                                      MappableExprsHandler::OMP_MAP_FROM)));
9039     MapperCGF.Builder.CreateBr(EndBB);
9040     MapperCGF.EmitBlock(AllocElseBB);
9041     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9042         LeftToFrom,
9043         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9044     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9045     // In case of to, clear OMP_MAP_FROM.
9046     MapperCGF.EmitBlock(ToBB);
9047     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9048         MemberMapType,
9049         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9050     MapperCGF.Builder.CreateBr(EndBB);
9051     MapperCGF.EmitBlock(ToElseBB);
9052     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9053         LeftToFrom,
9054         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9055     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9056     // In case of from, clear OMP_MAP_TO.
9057     MapperCGF.EmitBlock(FromBB);
9058     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9059         MemberMapType,
9060         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9061     // In case of tofrom, do nothing.
9062     MapperCGF.EmitBlock(EndBB);
9063     llvm::PHINode *CurMapType =
9064         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9065     CurMapType->addIncoming(AllocMapType, AllocBB);
9066     CurMapType->addIncoming(ToMapType, ToBB);
9067     CurMapType->addIncoming(FromMapType, FromBB);
9068     CurMapType->addIncoming(MemberMapType, ToElseBB);
9069 
9070     // TODO: call the corresponding mapper function if a user-defined mapper is
9071     // associated with this map clause.
9072     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9073     // data structure.
9074     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9075                                      CurSizeArg, CurMapType};
9076     MapperCGF.EmitRuntimeCall(
9077         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9078         OffloadingArgs);
9079   }
9080 
9081   // Update the pointer to point to the next element that needs to be mapped,
9082   // and check whether we have mapped all elements.
9083   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9084       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9085   PtrPHI->addIncoming(PtrNext, BodyBB);
9086   llvm::Value *IsDone =
9087       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9088   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9089   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9090 
9091   MapperCGF.EmitBlock(ExitBB);
9092   // Emit array deletion if this is an array section and \p MapType indicates
9093   // that deletion is required.
9094   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9095                              ElementSize, DoneBB, /*IsInit=*/false);
9096 
9097   // Emit the function exit block.
9098   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9099   MapperCGF.FinishFunction();
9100   UDMMap.try_emplace(D, Fn);
9101   if (CGF) {
9102     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9103     Decls.second.push_back(D);
9104   }
9105 }
9106 
9107 /// Emit the array initialization or deletion portion for user-defined mapper
9108 /// code generation. First, it evaluates whether an array section is mapped and
9109 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9110 /// true, and \a MapType indicates to not delete this array, array
9111 /// initialization code is generated. If \a IsInit is false, and \a MapType
9112 /// indicates to not this array, array deletion code is generated.
9113 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9114     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9115     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9116     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9117   StringRef Prefix = IsInit ? ".init" : ".del";
9118 
9119   // Evaluate if this is an array section.
9120   llvm::BasicBlock *IsDeleteBB =
9121       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9122   llvm::BasicBlock *BodyBB =
9123       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9124   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9125       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9126   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9127 
9128   // Evaluate if we are going to delete this section.
9129   MapperCGF.EmitBlock(IsDeleteBB);
9130   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9131       MapType,
9132       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9133   llvm::Value *DeleteCond;
9134   if (IsInit) {
9135     DeleteCond = MapperCGF.Builder.CreateIsNull(
9136         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9137   } else {
9138     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9139         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9140   }
9141   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9142 
9143   MapperCGF.EmitBlock(BodyBB);
9144   // Get the array size by multiplying element size and element number (i.e., \p
9145   // Size).
9146   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9147       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9148   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9149   // memory allocation/deletion purpose only.
9150   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9151       MapType,
9152       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9153                                    MappableExprsHandler::OMP_MAP_FROM)));
9154   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9155   // data structure.
9156   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9157   MapperCGF.EmitRuntimeCall(
9158       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9159 }
9160 
9161 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9162     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9163     llvm::Value *DeviceID,
9164     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9165                                      const OMPLoopDirective &D)>
9166         SizeEmitter) {
9167   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9168   const OMPExecutableDirective *TD = &D;
9169   // Get nested teams distribute kind directive, if any.
9170   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9171     TD = getNestedDistributeDirective(CGM.getContext(), D);
9172   if (!TD)
9173     return;
9174   const auto *LD = cast<OMPLoopDirective>(TD);
9175   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9176                                                      PrePostActionTy &) {
9177     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9178       llvm::Value *Args[] = {DeviceID, NumIterations};
9179       CGF.EmitRuntimeCall(
9180           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9181     }
9182   };
9183   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9184 }
9185 
9186 void CGOpenMPRuntime::emitTargetCall(
9187     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9188     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9189     const Expr *Device,
9190     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9191                                      const OMPLoopDirective &D)>
9192         SizeEmitter) {
9193   if (!CGF.HaveInsertPoint())
9194     return;
9195 
9196   assert(OutlinedFn && "Invalid outlined function!");
9197 
9198   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9199   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9200   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9201   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9202                                             PrePostActionTy &) {
9203     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9204   };
9205   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9206 
9207   CodeGenFunction::OMPTargetDataInfo InputInfo;
9208   llvm::Value *MapTypesArray = nullptr;
9209   // Fill up the pointer arrays and transfer execution to the device.
9210   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9211                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9212                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9213     // On top of the arrays that were filled up, the target offloading call
9214     // takes as arguments the device id as well as the host pointer. The host
9215     // pointer is used by the runtime library to identify the current target
9216     // region, so it only has to be unique and not necessarily point to
9217     // anything. It could be the pointer to the outlined function that
9218     // implements the target region, but we aren't using that so that the
9219     // compiler doesn't need to keep that, and could therefore inline the host
9220     // function if proven worthwhile during optimization.
9221 
9222     // From this point on, we need to have an ID of the target region defined.
9223     assert(OutlinedFnID && "Invalid outlined function ID!");
9224 
9225     // Emit device ID if any.
9226     llvm::Value *DeviceID;
9227     if (Device) {
9228       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9229                                            CGF.Int64Ty, /*isSigned=*/true);
9230     } else {
9231       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9232     }
9233 
9234     // Emit the number of elements in the offloading arrays.
9235     llvm::Value *PointerNum =
9236         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9237 
9238     // Return value of the runtime offloading call.
9239     llvm::Value *Return;
9240 
9241     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9242     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9243 
9244     // Emit tripcount for the target loop-based directive.
9245     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9246 
9247     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9248     // The target region is an outlined function launched by the runtime
9249     // via calls __tgt_target() or __tgt_target_teams().
9250     //
9251     // __tgt_target() launches a target region with one team and one thread,
9252     // executing a serial region.  This master thread may in turn launch
9253     // more threads within its team upon encountering a parallel region,
9254     // however, no additional teams can be launched on the device.
9255     //
9256     // __tgt_target_teams() launches a target region with one or more teams,
9257     // each with one or more threads.  This call is required for target
9258     // constructs such as:
9259     //  'target teams'
9260     //  'target' / 'teams'
9261     //  'target teams distribute parallel for'
9262     //  'target parallel'
9263     // and so on.
9264     //
9265     // Note that on the host and CPU targets, the runtime implementation of
9266     // these calls simply call the outlined function without forking threads.
9267     // The outlined functions themselves have runtime calls to
9268     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9269     // the compiler in emitTeamsCall() and emitParallelCall().
9270     //
9271     // In contrast, on the NVPTX target, the implementation of
9272     // __tgt_target_teams() launches a GPU kernel with the requested number
9273     // of teams and threads so no additional calls to the runtime are required.
9274     if (NumTeams) {
9275       // If we have NumTeams defined this means that we have an enclosed teams
9276       // region. Therefore we also expect to have NumThreads defined. These two
9277       // values should be defined in the presence of a teams directive,
9278       // regardless of having any clauses associated. If the user is using teams
9279       // but no clauses, these two values will be the default that should be
9280       // passed to the runtime library - a 32-bit integer with the value zero.
9281       assert(NumThreads && "Thread limit expression should be available along "
9282                            "with number of teams.");
9283       llvm::Value *OffloadingArgs[] = {DeviceID,
9284                                        OutlinedFnID,
9285                                        PointerNum,
9286                                        InputInfo.BasePointersArray.getPointer(),
9287                                        InputInfo.PointersArray.getPointer(),
9288                                        InputInfo.SizesArray.getPointer(),
9289                                        MapTypesArray,
9290                                        NumTeams,
9291                                        NumThreads};
9292       Return = CGF.EmitRuntimeCall(
9293           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9294                                           : OMPRTL__tgt_target_teams),
9295           OffloadingArgs);
9296     } else {
9297       llvm::Value *OffloadingArgs[] = {DeviceID,
9298                                        OutlinedFnID,
9299                                        PointerNum,
9300                                        InputInfo.BasePointersArray.getPointer(),
9301                                        InputInfo.PointersArray.getPointer(),
9302                                        InputInfo.SizesArray.getPointer(),
9303                                        MapTypesArray};
9304       Return = CGF.EmitRuntimeCall(
9305           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9306                                           : OMPRTL__tgt_target),
9307           OffloadingArgs);
9308     }
9309 
9310     // Check the error code and execute the host version if required.
9311     llvm::BasicBlock *OffloadFailedBlock =
9312         CGF.createBasicBlock("omp_offload.failed");
9313     llvm::BasicBlock *OffloadContBlock =
9314         CGF.createBasicBlock("omp_offload.cont");
9315     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9316     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9317 
9318     CGF.EmitBlock(OffloadFailedBlock);
9319     if (RequiresOuterTask) {
9320       CapturedVars.clear();
9321       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9322     }
9323     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9324     CGF.EmitBranch(OffloadContBlock);
9325 
9326     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9327   };
9328 
9329   // Notify that the host version must be executed.
9330   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9331                     RequiresOuterTask](CodeGenFunction &CGF,
9332                                        PrePostActionTy &) {
9333     if (RequiresOuterTask) {
9334       CapturedVars.clear();
9335       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9336     }
9337     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9338   };
9339 
9340   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9341                           &CapturedVars, RequiresOuterTask,
9342                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9343     // Fill up the arrays with all the captured variables.
9344     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9345     MappableExprsHandler::MapValuesArrayTy Pointers;
9346     MappableExprsHandler::MapValuesArrayTy Sizes;
9347     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9348 
9349     // Get mappable expression information.
9350     MappableExprsHandler MEHandler(D, CGF);
9351     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9352 
9353     auto RI = CS.getCapturedRecordDecl()->field_begin();
9354     auto CV = CapturedVars.begin();
9355     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9356                                               CE = CS.capture_end();
9357          CI != CE; ++CI, ++RI, ++CV) {
9358       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9359       MappableExprsHandler::MapValuesArrayTy CurPointers;
9360       MappableExprsHandler::MapValuesArrayTy CurSizes;
9361       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9362       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9363 
9364       // VLA sizes are passed to the outlined region by copy and do not have map
9365       // information associated.
9366       if (CI->capturesVariableArrayType()) {
9367         CurBasePointers.push_back(*CV);
9368         CurPointers.push_back(*CV);
9369         CurSizes.push_back(CGF.Builder.CreateIntCast(
9370             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9371         // Copy to the device as an argument. No need to retrieve it.
9372         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9373                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9374                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9375       } else {
9376         // If we have any information in the map clause, we use it, otherwise we
9377         // just do a default mapping.
9378         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9379                                          CurSizes, CurMapTypes, PartialStruct);
9380         if (CurBasePointers.empty())
9381           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9382                                            CurPointers, CurSizes, CurMapTypes);
9383         // Generate correct mapping for variables captured by reference in
9384         // lambdas.
9385         if (CI->capturesVariable())
9386           MEHandler.generateInfoForLambdaCaptures(
9387               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9388               CurMapTypes, LambdaPointers);
9389       }
9390       // We expect to have at least an element of information for this capture.
9391       assert(!CurBasePointers.empty() &&
9392              "Non-existing map pointer for capture!");
9393       assert(CurBasePointers.size() == CurPointers.size() &&
9394              CurBasePointers.size() == CurSizes.size() &&
9395              CurBasePointers.size() == CurMapTypes.size() &&
9396              "Inconsistent map information sizes!");
9397 
9398       // If there is an entry in PartialStruct it means we have a struct with
9399       // individual members mapped. Emit an extra combined entry.
9400       if (PartialStruct.Base.isValid())
9401         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9402                                     CurMapTypes, PartialStruct);
9403 
9404       // We need to append the results of this capture to what we already have.
9405       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9406       Pointers.append(CurPointers.begin(), CurPointers.end());
9407       Sizes.append(CurSizes.begin(), CurSizes.end());
9408       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9409     }
9410     // Adjust MEMBER_OF flags for the lambdas captures.
9411     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9412                                               Pointers, MapTypes);
9413     // Map other list items in the map clause which are not captured variables
9414     // but "declare target link" global variables.
9415     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9416                                                MapTypes);
9417 
9418     TargetDataInfo Info;
9419     // Fill up the arrays and create the arguments.
9420     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9421     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9422                                  Info.PointersArray, Info.SizesArray,
9423                                  Info.MapTypesArray, Info);
9424     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9425     InputInfo.BasePointersArray =
9426         Address(Info.BasePointersArray, CGM.getPointerAlign());
9427     InputInfo.PointersArray =
9428         Address(Info.PointersArray, CGM.getPointerAlign());
9429     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9430     MapTypesArray = Info.MapTypesArray;
9431     if (RequiresOuterTask)
9432       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9433     else
9434       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9435   };
9436 
9437   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9438                              CodeGenFunction &CGF, PrePostActionTy &) {
9439     if (RequiresOuterTask) {
9440       CodeGenFunction::OMPTargetDataInfo InputInfo;
9441       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9442     } else {
9443       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9444     }
9445   };
9446 
9447   // If we have a target function ID it means that we need to support
9448   // offloading, otherwise, just execute on the host. We need to execute on host
9449   // regardless of the conditional in the if clause if, e.g., the user do not
9450   // specify target triples.
9451   if (OutlinedFnID) {
9452     if (IfCond) {
9453       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9454     } else {
9455       RegionCodeGenTy ThenRCG(TargetThenGen);
9456       ThenRCG(CGF);
9457     }
9458   } else {
9459     RegionCodeGenTy ElseRCG(TargetElseGen);
9460     ElseRCG(CGF);
9461   }
9462 }
9463 
9464 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9465                                                     StringRef ParentName) {
9466   if (!S)
9467     return;
9468 
9469   // Codegen OMP target directives that offload compute to the device.
9470   bool RequiresDeviceCodegen =
9471       isa<OMPExecutableDirective>(S) &&
9472       isOpenMPTargetExecutionDirective(
9473           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9474 
9475   if (RequiresDeviceCodegen) {
9476     const auto &E = *cast<OMPExecutableDirective>(S);
9477     unsigned DeviceID;
9478     unsigned FileID;
9479     unsigned Line;
9480     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9481                              FileID, Line);
9482 
9483     // Is this a target region that should not be emitted as an entry point? If
9484     // so just signal we are done with this target region.
9485     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9486                                                             ParentName, Line))
9487       return;
9488 
9489     switch (E.getDirectiveKind()) {
9490     case OMPD_target:
9491       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9492                                                    cast<OMPTargetDirective>(E));
9493       break;
9494     case OMPD_target_parallel:
9495       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9496           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9497       break;
9498     case OMPD_target_teams:
9499       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9500           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9501       break;
9502     case OMPD_target_teams_distribute:
9503       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9504           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9505       break;
9506     case OMPD_target_teams_distribute_simd:
9507       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9508           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9509       break;
9510     case OMPD_target_parallel_for:
9511       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9512           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9513       break;
9514     case OMPD_target_parallel_for_simd:
9515       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9516           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9517       break;
9518     case OMPD_target_simd:
9519       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9520           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9521       break;
9522     case OMPD_target_teams_distribute_parallel_for:
9523       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9524           CGM, ParentName,
9525           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9526       break;
9527     case OMPD_target_teams_distribute_parallel_for_simd:
9528       CodeGenFunction::
9529           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9530               CGM, ParentName,
9531               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9532       break;
9533     case OMPD_parallel:
9534     case OMPD_for:
9535     case OMPD_parallel_for:
9536     case OMPD_parallel_master:
9537     case OMPD_parallel_sections:
9538     case OMPD_for_simd:
9539     case OMPD_parallel_for_simd:
9540     case OMPD_cancel:
9541     case OMPD_cancellation_point:
9542     case OMPD_ordered:
9543     case OMPD_threadprivate:
9544     case OMPD_allocate:
9545     case OMPD_task:
9546     case OMPD_simd:
9547     case OMPD_sections:
9548     case OMPD_section:
9549     case OMPD_single:
9550     case OMPD_master:
9551     case OMPD_critical:
9552     case OMPD_taskyield:
9553     case OMPD_barrier:
9554     case OMPD_taskwait:
9555     case OMPD_taskgroup:
9556     case OMPD_atomic:
9557     case OMPD_flush:
9558     case OMPD_teams:
9559     case OMPD_target_data:
9560     case OMPD_target_exit_data:
9561     case OMPD_target_enter_data:
9562     case OMPD_distribute:
9563     case OMPD_distribute_simd:
9564     case OMPD_distribute_parallel_for:
9565     case OMPD_distribute_parallel_for_simd:
9566     case OMPD_teams_distribute:
9567     case OMPD_teams_distribute_simd:
9568     case OMPD_teams_distribute_parallel_for:
9569     case OMPD_teams_distribute_parallel_for_simd:
9570     case OMPD_target_update:
9571     case OMPD_declare_simd:
9572     case OMPD_declare_variant:
9573     case OMPD_declare_target:
9574     case OMPD_end_declare_target:
9575     case OMPD_declare_reduction:
9576     case OMPD_declare_mapper:
9577     case OMPD_taskloop:
9578     case OMPD_taskloop_simd:
9579     case OMPD_master_taskloop:
9580     case OMPD_master_taskloop_simd:
9581     case OMPD_parallel_master_taskloop:
9582     case OMPD_parallel_master_taskloop_simd:
9583     case OMPD_requires:
9584     case OMPD_unknown:
9585       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9586     }
9587     return;
9588   }
9589 
9590   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9591     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9592       return;
9593 
9594     scanForTargetRegionsFunctions(
9595         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9596     return;
9597   }
9598 
9599   // If this is a lambda function, look into its body.
9600   if (const auto *L = dyn_cast<LambdaExpr>(S))
9601     S = L->getBody();
9602 
9603   // Keep looking for target regions recursively.
9604   for (const Stmt *II : S->children())
9605     scanForTargetRegionsFunctions(II, ParentName);
9606 }
9607 
9608 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9609   // If emitting code for the host, we do not process FD here. Instead we do
9610   // the normal code generation.
9611   if (!CGM.getLangOpts().OpenMPIsDevice) {
9612     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9613       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9614           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9615       // Do not emit device_type(nohost) functions for the host.
9616       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9617         return true;
9618     }
9619     return false;
9620   }
9621 
9622   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9623   // Try to detect target regions in the function.
9624   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9625     StringRef Name = CGM.getMangledName(GD);
9626     scanForTargetRegionsFunctions(FD->getBody(), Name);
9627     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9628         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9629     // Do not emit device_type(nohost) functions for the host.
9630     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9631       return true;
9632   }
9633 
9634   // Do not to emit function if it is not marked as declare target.
9635   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9636          AlreadyEmittedTargetDecls.count(VD) == 0;
9637 }
9638 
9639 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9640   if (!CGM.getLangOpts().OpenMPIsDevice)
9641     return false;
9642 
9643   // Check if there are Ctors/Dtors in this declaration and look for target
9644   // regions in it. We use the complete variant to produce the kernel name
9645   // mangling.
9646   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9647   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9648     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9649       StringRef ParentName =
9650           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9651       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9652     }
9653     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9654       StringRef ParentName =
9655           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9656       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9657     }
9658   }
9659 
9660   // Do not to emit variable if it is not marked as declare target.
9661   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9662       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9663           cast<VarDecl>(GD.getDecl()));
9664   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9665       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9666        HasRequiresUnifiedSharedMemory)) {
9667     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9668     return true;
9669   }
9670   return false;
9671 }
9672 
9673 llvm::Constant *
9674 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9675                                                 const VarDecl *VD) {
9676   assert(VD->getType().isConstant(CGM.getContext()) &&
9677          "Expected constant variable.");
9678   StringRef VarName;
9679   llvm::Constant *Addr;
9680   llvm::GlobalValue::LinkageTypes Linkage;
9681   QualType Ty = VD->getType();
9682   SmallString<128> Buffer;
9683   {
9684     unsigned DeviceID;
9685     unsigned FileID;
9686     unsigned Line;
9687     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9688                              FileID, Line);
9689     llvm::raw_svector_ostream OS(Buffer);
9690     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9691        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9692     VarName = OS.str();
9693   }
9694   Linkage = llvm::GlobalValue::InternalLinkage;
9695   Addr =
9696       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9697                                   getDefaultFirstprivateAddressSpace());
9698   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9699   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9700   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9701   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9702       VarName, Addr, VarSize,
9703       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9704   return Addr;
9705 }
9706 
9707 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9708                                                    llvm::Constant *Addr) {
9709   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9710       !CGM.getLangOpts().OpenMPIsDevice)
9711     return;
9712   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9713       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9714   if (!Res) {
9715     if (CGM.getLangOpts().OpenMPIsDevice) {
9716       // Register non-target variables being emitted in device code (debug info
9717       // may cause this).
9718       StringRef VarName = CGM.getMangledName(VD);
9719       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9720     }
9721     return;
9722   }
9723   // Register declare target variables.
9724   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9725   StringRef VarName;
9726   CharUnits VarSize;
9727   llvm::GlobalValue::LinkageTypes Linkage;
9728 
9729   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9730       !HasRequiresUnifiedSharedMemory) {
9731     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9732     VarName = CGM.getMangledName(VD);
9733     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9734       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9735       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9736     } else {
9737       VarSize = CharUnits::Zero();
9738     }
9739     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9740     // Temp solution to prevent optimizations of the internal variables.
9741     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9742       std::string RefName = getName({VarName, "ref"});
9743       if (!CGM.GetGlobalValue(RefName)) {
9744         llvm::Constant *AddrRef =
9745             getOrCreateInternalVariable(Addr->getType(), RefName);
9746         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9747         GVAddrRef->setConstant(/*Val=*/true);
9748         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9749         GVAddrRef->setInitializer(Addr);
9750         CGM.addCompilerUsedGlobal(GVAddrRef);
9751       }
9752     }
9753   } else {
9754     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9755             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9756              HasRequiresUnifiedSharedMemory)) &&
9757            "Declare target attribute must link or to with unified memory.");
9758     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9759       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9760     else
9761       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9762 
9763     if (CGM.getLangOpts().OpenMPIsDevice) {
9764       VarName = Addr->getName();
9765       Addr = nullptr;
9766     } else {
9767       VarName = getAddrOfDeclareTargetVar(VD).getName();
9768       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9769     }
9770     VarSize = CGM.getPointerSize();
9771     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9772   }
9773 
9774   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9775       VarName, Addr, VarSize, Flags, Linkage);
9776 }
9777 
9778 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9779   if (isa<FunctionDecl>(GD.getDecl()) ||
9780       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9781     return emitTargetFunctions(GD);
9782 
9783   return emitTargetGlobalVariable(GD);
9784 }
9785 
9786 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9787   for (const VarDecl *VD : DeferredGlobalVariables) {
9788     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9789         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9790     if (!Res)
9791       continue;
9792     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9793         !HasRequiresUnifiedSharedMemory) {
9794       CGM.EmitGlobal(VD);
9795     } else {
9796       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9797               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9798                HasRequiresUnifiedSharedMemory)) &&
9799              "Expected link clause or to clause with unified memory.");
9800       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9801     }
9802   }
9803 }
9804 
9805 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9806     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9807   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9808          " Expected target-based directive.");
9809 }
9810 
9811 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9812   for (const OMPClause *Clause : D->clauselists()) {
9813     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9814       HasRequiresUnifiedSharedMemory = true;
9815     } else if (const auto *AC =
9816                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9817       switch (AC->getAtomicDefaultMemOrderKind()) {
9818       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9819         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9820         break;
9821       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9822         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9823         break;
9824       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9825         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9826         break;
9827       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9828         break;
9829       }
9830     }
9831   }
9832 }
9833 
9834 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9835   return RequiresAtomicOrdering;
9836 }
9837 
9838 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9839                                                        LangAS &AS) {
9840   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9841     return false;
9842   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9843   switch(A->getAllocatorType()) {
9844   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9845   // Not supported, fallback to the default mem space.
9846   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9847   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9848   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9849   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9850   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9851   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9852   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9853     AS = LangAS::Default;
9854     return true;
9855   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9856     llvm_unreachable("Expected predefined allocator for the variables with the "
9857                      "static storage.");
9858   }
9859   return false;
9860 }
9861 
9862 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9863   return HasRequiresUnifiedSharedMemory;
9864 }
9865 
9866 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9867     CodeGenModule &CGM)
9868     : CGM(CGM) {
9869   if (CGM.getLangOpts().OpenMPIsDevice) {
9870     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9871     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9872   }
9873 }
9874 
9875 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9876   if (CGM.getLangOpts().OpenMPIsDevice)
9877     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9878 }
9879 
9880 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9881   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9882     return true;
9883 
9884   const auto *D = cast<FunctionDecl>(GD.getDecl());
9885   // Do not to emit function if it is marked as declare target as it was already
9886   // emitted.
9887   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9888     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9889       if (auto *F = dyn_cast_or_null<llvm::Function>(
9890               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9891         return !F->isDeclaration();
9892       return false;
9893     }
9894     return true;
9895   }
9896 
9897   return !AlreadyEmittedTargetDecls.insert(D).second;
9898 }
9899 
9900 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9901   // If we don't have entries or if we are emitting code for the device, we
9902   // don't need to do anything.
9903   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9904       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9905       (OffloadEntriesInfoManager.empty() &&
9906        !HasEmittedDeclareTargetRegion &&
9907        !HasEmittedTargetRegion))
9908     return nullptr;
9909 
9910   // Create and register the function that handles the requires directives.
9911   ASTContext &C = CGM.getContext();
9912 
9913   llvm::Function *RequiresRegFn;
9914   {
9915     CodeGenFunction CGF(CGM);
9916     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9917     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9918     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9919     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9920     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9921     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9922     // TODO: check for other requires clauses.
9923     // The requires directive takes effect only when a target region is
9924     // present in the compilation unit. Otherwise it is ignored and not
9925     // passed to the runtime. This avoids the runtime from throwing an error
9926     // for mismatching requires clauses across compilation units that don't
9927     // contain at least 1 target region.
9928     assert((HasEmittedTargetRegion ||
9929             HasEmittedDeclareTargetRegion ||
9930             !OffloadEntriesInfoManager.empty()) &&
9931            "Target or declare target region expected.");
9932     if (HasRequiresUnifiedSharedMemory)
9933       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9934     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9935         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9936     CGF.FinishFunction();
9937   }
9938   return RequiresRegFn;
9939 }
9940 
9941 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9942                                     const OMPExecutableDirective &D,
9943                                     SourceLocation Loc,
9944                                     llvm::Function *OutlinedFn,
9945                                     ArrayRef<llvm::Value *> CapturedVars) {
9946   if (!CGF.HaveInsertPoint())
9947     return;
9948 
9949   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9950   CodeGenFunction::RunCleanupsScope Scope(CGF);
9951 
9952   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9953   llvm::Value *Args[] = {
9954       RTLoc,
9955       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9956       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9957   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9958   RealArgs.append(std::begin(Args), std::end(Args));
9959   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9960 
9961   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9962   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9963 }
9964 
9965 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9966                                          const Expr *NumTeams,
9967                                          const Expr *ThreadLimit,
9968                                          SourceLocation Loc) {
9969   if (!CGF.HaveInsertPoint())
9970     return;
9971 
9972   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9973 
9974   llvm::Value *NumTeamsVal =
9975       NumTeams
9976           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9977                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9978           : CGF.Builder.getInt32(0);
9979 
9980   llvm::Value *ThreadLimitVal =
9981       ThreadLimit
9982           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9983                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9984           : CGF.Builder.getInt32(0);
9985 
9986   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9987   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9988                                      ThreadLimitVal};
9989   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9990                       PushNumTeamsArgs);
9991 }
9992 
9993 void CGOpenMPRuntime::emitTargetDataCalls(
9994     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9995     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9996   if (!CGF.HaveInsertPoint())
9997     return;
9998 
9999   // Action used to replace the default codegen action and turn privatization
10000   // off.
10001   PrePostActionTy NoPrivAction;
10002 
10003   // Generate the code for the opening of the data environment. Capture all the
10004   // arguments of the runtime call by reference because they are used in the
10005   // closing of the region.
10006   auto &&BeginThenGen = [this, &D, Device, &Info,
10007                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10008     // Fill up the arrays with all the mapped variables.
10009     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10010     MappableExprsHandler::MapValuesArrayTy Pointers;
10011     MappableExprsHandler::MapValuesArrayTy Sizes;
10012     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10013 
10014     // Get map clause information.
10015     MappableExprsHandler MCHandler(D, CGF);
10016     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10017 
10018     // Fill up the arrays and create the arguments.
10019     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10020 
10021     llvm::Value *BasePointersArrayArg = nullptr;
10022     llvm::Value *PointersArrayArg = nullptr;
10023     llvm::Value *SizesArrayArg = nullptr;
10024     llvm::Value *MapTypesArrayArg = nullptr;
10025     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10026                                  SizesArrayArg, MapTypesArrayArg, Info);
10027 
10028     // Emit device ID if any.
10029     llvm::Value *DeviceID = nullptr;
10030     if (Device) {
10031       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10032                                            CGF.Int64Ty, /*isSigned=*/true);
10033     } else {
10034       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10035     }
10036 
10037     // Emit the number of elements in the offloading arrays.
10038     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10039 
10040     llvm::Value *OffloadingArgs[] = {
10041         DeviceID,         PointerNum,    BasePointersArrayArg,
10042         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10043     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10044                         OffloadingArgs);
10045 
10046     // If device pointer privatization is required, emit the body of the region
10047     // here. It will have to be duplicated: with and without privatization.
10048     if (!Info.CaptureDeviceAddrMap.empty())
10049       CodeGen(CGF);
10050   };
10051 
10052   // Generate code for the closing of the data region.
10053   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10054                                             PrePostActionTy &) {
10055     assert(Info.isValid() && "Invalid data environment closing arguments.");
10056 
10057     llvm::Value *BasePointersArrayArg = nullptr;
10058     llvm::Value *PointersArrayArg = nullptr;
10059     llvm::Value *SizesArrayArg = nullptr;
10060     llvm::Value *MapTypesArrayArg = nullptr;
10061     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10062                                  SizesArrayArg, MapTypesArrayArg, Info);
10063 
10064     // Emit device ID if any.
10065     llvm::Value *DeviceID = nullptr;
10066     if (Device) {
10067       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10068                                            CGF.Int64Ty, /*isSigned=*/true);
10069     } else {
10070       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10071     }
10072 
10073     // Emit the number of elements in the offloading arrays.
10074     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10075 
10076     llvm::Value *OffloadingArgs[] = {
10077         DeviceID,         PointerNum,    BasePointersArrayArg,
10078         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10079     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10080                         OffloadingArgs);
10081   };
10082 
10083   // If we need device pointer privatization, we need to emit the body of the
10084   // region with no privatization in the 'else' branch of the conditional.
10085   // Otherwise, we don't have to do anything.
10086   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10087                                                          PrePostActionTy &) {
10088     if (!Info.CaptureDeviceAddrMap.empty()) {
10089       CodeGen.setAction(NoPrivAction);
10090       CodeGen(CGF);
10091     }
10092   };
10093 
10094   // We don't have to do anything to close the region if the if clause evaluates
10095   // to false.
10096   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10097 
10098   if (IfCond) {
10099     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10100   } else {
10101     RegionCodeGenTy RCG(BeginThenGen);
10102     RCG(CGF);
10103   }
10104 
10105   // If we don't require privatization of device pointers, we emit the body in
10106   // between the runtime calls. This avoids duplicating the body code.
10107   if (Info.CaptureDeviceAddrMap.empty()) {
10108     CodeGen.setAction(NoPrivAction);
10109     CodeGen(CGF);
10110   }
10111 
10112   if (IfCond) {
10113     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10114   } else {
10115     RegionCodeGenTy RCG(EndThenGen);
10116     RCG(CGF);
10117   }
10118 }
10119 
10120 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10121     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10122     const Expr *Device) {
10123   if (!CGF.HaveInsertPoint())
10124     return;
10125 
10126   assert((isa<OMPTargetEnterDataDirective>(D) ||
10127           isa<OMPTargetExitDataDirective>(D) ||
10128           isa<OMPTargetUpdateDirective>(D)) &&
10129          "Expecting either target enter, exit data, or update directives.");
10130 
10131   CodeGenFunction::OMPTargetDataInfo InputInfo;
10132   llvm::Value *MapTypesArray = nullptr;
10133   // Generate the code for the opening of the data environment.
10134   auto &&ThenGen = [this, &D, Device, &InputInfo,
10135                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10136     // Emit device ID if any.
10137     llvm::Value *DeviceID = nullptr;
10138     if (Device) {
10139       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10140                                            CGF.Int64Ty, /*isSigned=*/true);
10141     } else {
10142       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10143     }
10144 
10145     // Emit the number of elements in the offloading arrays.
10146     llvm::Constant *PointerNum =
10147         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10148 
10149     llvm::Value *OffloadingArgs[] = {DeviceID,
10150                                      PointerNum,
10151                                      InputInfo.BasePointersArray.getPointer(),
10152                                      InputInfo.PointersArray.getPointer(),
10153                                      InputInfo.SizesArray.getPointer(),
10154                                      MapTypesArray};
10155 
10156     // Select the right runtime function call for each expected standalone
10157     // directive.
10158     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10159     OpenMPRTLFunction RTLFn;
10160     switch (D.getDirectiveKind()) {
10161     case OMPD_target_enter_data:
10162       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10163                         : OMPRTL__tgt_target_data_begin;
10164       break;
10165     case OMPD_target_exit_data:
10166       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10167                         : OMPRTL__tgt_target_data_end;
10168       break;
10169     case OMPD_target_update:
10170       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10171                         : OMPRTL__tgt_target_data_update;
10172       break;
10173     case OMPD_parallel:
10174     case OMPD_for:
10175     case OMPD_parallel_for:
10176     case OMPD_parallel_master:
10177     case OMPD_parallel_sections:
10178     case OMPD_for_simd:
10179     case OMPD_parallel_for_simd:
10180     case OMPD_cancel:
10181     case OMPD_cancellation_point:
10182     case OMPD_ordered:
10183     case OMPD_threadprivate:
10184     case OMPD_allocate:
10185     case OMPD_task:
10186     case OMPD_simd:
10187     case OMPD_sections:
10188     case OMPD_section:
10189     case OMPD_single:
10190     case OMPD_master:
10191     case OMPD_critical:
10192     case OMPD_taskyield:
10193     case OMPD_barrier:
10194     case OMPD_taskwait:
10195     case OMPD_taskgroup:
10196     case OMPD_atomic:
10197     case OMPD_flush:
10198     case OMPD_teams:
10199     case OMPD_target_data:
10200     case OMPD_distribute:
10201     case OMPD_distribute_simd:
10202     case OMPD_distribute_parallel_for:
10203     case OMPD_distribute_parallel_for_simd:
10204     case OMPD_teams_distribute:
10205     case OMPD_teams_distribute_simd:
10206     case OMPD_teams_distribute_parallel_for:
10207     case OMPD_teams_distribute_parallel_for_simd:
10208     case OMPD_declare_simd:
10209     case OMPD_declare_variant:
10210     case OMPD_declare_target:
10211     case OMPD_end_declare_target:
10212     case OMPD_declare_reduction:
10213     case OMPD_declare_mapper:
10214     case OMPD_taskloop:
10215     case OMPD_taskloop_simd:
10216     case OMPD_master_taskloop:
10217     case OMPD_master_taskloop_simd:
10218     case OMPD_parallel_master_taskloop:
10219     case OMPD_parallel_master_taskloop_simd:
10220     case OMPD_target:
10221     case OMPD_target_simd:
10222     case OMPD_target_teams_distribute:
10223     case OMPD_target_teams_distribute_simd:
10224     case OMPD_target_teams_distribute_parallel_for:
10225     case OMPD_target_teams_distribute_parallel_for_simd:
10226     case OMPD_target_teams:
10227     case OMPD_target_parallel:
10228     case OMPD_target_parallel_for:
10229     case OMPD_target_parallel_for_simd:
10230     case OMPD_requires:
10231     case OMPD_unknown:
10232       llvm_unreachable("Unexpected standalone target data directive.");
10233       break;
10234     }
10235     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10236   };
10237 
10238   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10239                              CodeGenFunction &CGF, PrePostActionTy &) {
10240     // Fill up the arrays with all the mapped variables.
10241     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10242     MappableExprsHandler::MapValuesArrayTy Pointers;
10243     MappableExprsHandler::MapValuesArrayTy Sizes;
10244     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10245 
10246     // Get map clause information.
10247     MappableExprsHandler MEHandler(D, CGF);
10248     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10249 
10250     TargetDataInfo Info;
10251     // Fill up the arrays and create the arguments.
10252     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10253     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10254                                  Info.PointersArray, Info.SizesArray,
10255                                  Info.MapTypesArray, Info);
10256     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10257     InputInfo.BasePointersArray =
10258         Address(Info.BasePointersArray, CGM.getPointerAlign());
10259     InputInfo.PointersArray =
10260         Address(Info.PointersArray, CGM.getPointerAlign());
10261     InputInfo.SizesArray =
10262         Address(Info.SizesArray, CGM.getPointerAlign());
10263     MapTypesArray = Info.MapTypesArray;
10264     if (D.hasClausesOfKind<OMPDependClause>())
10265       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10266     else
10267       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10268   };
10269 
10270   if (IfCond) {
10271     emitIfClause(CGF, IfCond, TargetThenGen,
10272                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10273   } else {
10274     RegionCodeGenTy ThenRCG(TargetThenGen);
10275     ThenRCG(CGF);
10276   }
10277 }
10278 
10279 namespace {
10280   /// Kind of parameter in a function with 'declare simd' directive.
10281   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10282   /// Attribute set of the parameter.
10283   struct ParamAttrTy {
10284     ParamKindTy Kind = Vector;
10285     llvm::APSInt StrideOrArg;
10286     llvm::APSInt Alignment;
10287   };
10288 } // namespace
10289 
10290 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10291                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10292   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10293   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10294   // of that clause. The VLEN value must be power of 2.
10295   // In other case the notion of the function`s "characteristic data type" (CDT)
10296   // is used to compute the vector length.
10297   // CDT is defined in the following order:
10298   //   a) For non-void function, the CDT is the return type.
10299   //   b) If the function has any non-uniform, non-linear parameters, then the
10300   //   CDT is the type of the first such parameter.
10301   //   c) If the CDT determined by a) or b) above is struct, union, or class
10302   //   type which is pass-by-value (except for the type that maps to the
10303   //   built-in complex data type), the characteristic data type is int.
10304   //   d) If none of the above three cases is applicable, the CDT is int.
10305   // The VLEN is then determined based on the CDT and the size of vector
10306   // register of that ISA for which current vector version is generated. The
10307   // VLEN is computed using the formula below:
10308   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10309   // where vector register size specified in section 3.2.1 Registers and the
10310   // Stack Frame of original AMD64 ABI document.
10311   QualType RetType = FD->getReturnType();
10312   if (RetType.isNull())
10313     return 0;
10314   ASTContext &C = FD->getASTContext();
10315   QualType CDT;
10316   if (!RetType.isNull() && !RetType->isVoidType()) {
10317     CDT = RetType;
10318   } else {
10319     unsigned Offset = 0;
10320     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10321       if (ParamAttrs[Offset].Kind == Vector)
10322         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10323       ++Offset;
10324     }
10325     if (CDT.isNull()) {
10326       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10327         if (ParamAttrs[I + Offset].Kind == Vector) {
10328           CDT = FD->getParamDecl(I)->getType();
10329           break;
10330         }
10331       }
10332     }
10333   }
10334   if (CDT.isNull())
10335     CDT = C.IntTy;
10336   CDT = CDT->getCanonicalTypeUnqualified();
10337   if (CDT->isRecordType() || CDT->isUnionType())
10338     CDT = C.IntTy;
10339   return C.getTypeSize(CDT);
10340 }
10341 
10342 static void
10343 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10344                            const llvm::APSInt &VLENVal,
10345                            ArrayRef<ParamAttrTy> ParamAttrs,
10346                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10347   struct ISADataTy {
10348     char ISA;
10349     unsigned VecRegSize;
10350   };
10351   ISADataTy ISAData[] = {
10352       {
10353           'b', 128
10354       }, // SSE
10355       {
10356           'c', 256
10357       }, // AVX
10358       {
10359           'd', 256
10360       }, // AVX2
10361       {
10362           'e', 512
10363       }, // AVX512
10364   };
10365   llvm::SmallVector<char, 2> Masked;
10366   switch (State) {
10367   case OMPDeclareSimdDeclAttr::BS_Undefined:
10368     Masked.push_back('N');
10369     Masked.push_back('M');
10370     break;
10371   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10372     Masked.push_back('N');
10373     break;
10374   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10375     Masked.push_back('M');
10376     break;
10377   }
10378   for (char Mask : Masked) {
10379     for (const ISADataTy &Data : ISAData) {
10380       SmallString<256> Buffer;
10381       llvm::raw_svector_ostream Out(Buffer);
10382       Out << "_ZGV" << Data.ISA << Mask;
10383       if (!VLENVal) {
10384         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10385         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10386         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10387       } else {
10388         Out << VLENVal;
10389       }
10390       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10391         switch (ParamAttr.Kind){
10392         case LinearWithVarStride:
10393           Out << 's' << ParamAttr.StrideOrArg;
10394           break;
10395         case Linear:
10396           Out << 'l';
10397           if (!!ParamAttr.StrideOrArg)
10398             Out << ParamAttr.StrideOrArg;
10399           break;
10400         case Uniform:
10401           Out << 'u';
10402           break;
10403         case Vector:
10404           Out << 'v';
10405           break;
10406         }
10407         if (!!ParamAttr.Alignment)
10408           Out << 'a' << ParamAttr.Alignment;
10409       }
10410       Out << '_' << Fn->getName();
10411       Fn->addFnAttr(Out.str());
10412     }
10413   }
10414 }
10415 
10416 // This are the Functions that are needed to mangle the name of the
10417 // vector functions generated by the compiler, according to the rules
10418 // defined in the "Vector Function ABI specifications for AArch64",
10419 // available at
10420 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10421 
10422 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10423 ///
10424 /// TODO: Need to implement the behavior for reference marked with a
10425 /// var or no linear modifiers (1.b in the section). For this, we
10426 /// need to extend ParamKindTy to support the linear modifiers.
10427 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10428   QT = QT.getCanonicalType();
10429 
10430   if (QT->isVoidType())
10431     return false;
10432 
10433   if (Kind == ParamKindTy::Uniform)
10434     return false;
10435 
10436   if (Kind == ParamKindTy::Linear)
10437     return false;
10438 
10439   // TODO: Handle linear references with modifiers
10440 
10441   if (Kind == ParamKindTy::LinearWithVarStride)
10442     return false;
10443 
10444   return true;
10445 }
10446 
10447 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10448 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10449   QT = QT.getCanonicalType();
10450   unsigned Size = C.getTypeSize(QT);
10451 
10452   // Only scalars and complex within 16 bytes wide set PVB to true.
10453   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10454     return false;
10455 
10456   if (QT->isFloatingType())
10457     return true;
10458 
10459   if (QT->isIntegerType())
10460     return true;
10461 
10462   if (QT->isPointerType())
10463     return true;
10464 
10465   // TODO: Add support for complex types (section 3.1.2, item 2).
10466 
10467   return false;
10468 }
10469 
10470 /// Computes the lane size (LS) of a return type or of an input parameter,
10471 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10472 /// TODO: Add support for references, section 3.2.1, item 1.
10473 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10474   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10475     QualType PTy = QT.getCanonicalType()->getPointeeType();
10476     if (getAArch64PBV(PTy, C))
10477       return C.getTypeSize(PTy);
10478   }
10479   if (getAArch64PBV(QT, C))
10480     return C.getTypeSize(QT);
10481 
10482   return C.getTypeSize(C.getUIntPtrType());
10483 }
10484 
10485 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10486 // signature of the scalar function, as defined in 3.2.2 of the
10487 // AAVFABI.
10488 static std::tuple<unsigned, unsigned, bool>
10489 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10490   QualType RetType = FD->getReturnType().getCanonicalType();
10491 
10492   ASTContext &C = FD->getASTContext();
10493 
10494   bool OutputBecomesInput = false;
10495 
10496   llvm::SmallVector<unsigned, 8> Sizes;
10497   if (!RetType->isVoidType()) {
10498     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10499     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10500       OutputBecomesInput = true;
10501   }
10502   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10503     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10504     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10505   }
10506 
10507   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10508   // The LS of a function parameter / return value can only be a power
10509   // of 2, starting from 8 bits, up to 128.
10510   assert(std::all_of(Sizes.begin(), Sizes.end(),
10511                      [](unsigned Size) {
10512                        return Size == 8 || Size == 16 || Size == 32 ||
10513                               Size == 64 || Size == 128;
10514                      }) &&
10515          "Invalid size");
10516 
10517   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10518                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10519                          OutputBecomesInput);
10520 }
10521 
10522 /// Mangle the parameter part of the vector function name according to
10523 /// their OpenMP classification. The mangling function is defined in
10524 /// section 3.5 of the AAVFABI.
10525 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10526   SmallString<256> Buffer;
10527   llvm::raw_svector_ostream Out(Buffer);
10528   for (const auto &ParamAttr : ParamAttrs) {
10529     switch (ParamAttr.Kind) {
10530     case LinearWithVarStride:
10531       Out << "ls" << ParamAttr.StrideOrArg;
10532       break;
10533     case Linear:
10534       Out << 'l';
10535       // Don't print the step value if it is not present or if it is
10536       // equal to 1.
10537       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10538         Out << ParamAttr.StrideOrArg;
10539       break;
10540     case Uniform:
10541       Out << 'u';
10542       break;
10543     case Vector:
10544       Out << 'v';
10545       break;
10546     }
10547 
10548     if (!!ParamAttr.Alignment)
10549       Out << 'a' << ParamAttr.Alignment;
10550   }
10551 
10552   return std::string(Out.str());
10553 }
10554 
10555 // Function used to add the attribute. The parameter `VLEN` is
10556 // templated to allow the use of "x" when targeting scalable functions
10557 // for SVE.
10558 template <typename T>
10559 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10560                                  char ISA, StringRef ParSeq,
10561                                  StringRef MangledName, bool OutputBecomesInput,
10562                                  llvm::Function *Fn) {
10563   SmallString<256> Buffer;
10564   llvm::raw_svector_ostream Out(Buffer);
10565   Out << Prefix << ISA << LMask << VLEN;
10566   if (OutputBecomesInput)
10567     Out << "v";
10568   Out << ParSeq << "_" << MangledName;
10569   Fn->addFnAttr(Out.str());
10570 }
10571 
10572 // Helper function to generate the Advanced SIMD names depending on
10573 // the value of the NDS when simdlen is not present.
10574 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10575                                       StringRef Prefix, char ISA,
10576                                       StringRef ParSeq, StringRef MangledName,
10577                                       bool OutputBecomesInput,
10578                                       llvm::Function *Fn) {
10579   switch (NDS) {
10580   case 8:
10581     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10582                          OutputBecomesInput, Fn);
10583     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10584                          OutputBecomesInput, Fn);
10585     break;
10586   case 16:
10587     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10588                          OutputBecomesInput, Fn);
10589     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10590                          OutputBecomesInput, Fn);
10591     break;
10592   case 32:
10593     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10594                          OutputBecomesInput, Fn);
10595     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10596                          OutputBecomesInput, Fn);
10597     break;
10598   case 64:
10599   case 128:
10600     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10601                          OutputBecomesInput, Fn);
10602     break;
10603   default:
10604     llvm_unreachable("Scalar type is too wide.");
10605   }
10606 }
10607 
10608 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10609 static void emitAArch64DeclareSimdFunction(
10610     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10611     ArrayRef<ParamAttrTy> ParamAttrs,
10612     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10613     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10614 
10615   // Get basic data for building the vector signature.
10616   const auto Data = getNDSWDS(FD, ParamAttrs);
10617   const unsigned NDS = std::get<0>(Data);
10618   const unsigned WDS = std::get<1>(Data);
10619   const bool OutputBecomesInput = std::get<2>(Data);
10620 
10621   // Check the values provided via `simdlen` by the user.
10622   // 1. A `simdlen(1)` doesn't produce vector signatures,
10623   if (UserVLEN == 1) {
10624     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10625         DiagnosticsEngine::Warning,
10626         "The clause simdlen(1) has no effect when targeting aarch64.");
10627     CGM.getDiags().Report(SLoc, DiagID);
10628     return;
10629   }
10630 
10631   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10632   // Advanced SIMD output.
10633   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10634     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10635         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10636                                     "power of 2 when targeting Advanced SIMD.");
10637     CGM.getDiags().Report(SLoc, DiagID);
10638     return;
10639   }
10640 
10641   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10642   // limits.
10643   if (ISA == 's' && UserVLEN != 0) {
10644     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10645       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10646           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10647                                       "lanes in the architectural constraints "
10648                                       "for SVE (min is 128-bit, max is "
10649                                       "2048-bit, by steps of 128-bit)");
10650       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10651       return;
10652     }
10653   }
10654 
10655   // Sort out parameter sequence.
10656   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10657   StringRef Prefix = "_ZGV";
10658   // Generate simdlen from user input (if any).
10659   if (UserVLEN) {
10660     if (ISA == 's') {
10661       // SVE generates only a masked function.
10662       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10663                            OutputBecomesInput, Fn);
10664     } else {
10665       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10666       // Advanced SIMD generates one or two functions, depending on
10667       // the `[not]inbranch` clause.
10668       switch (State) {
10669       case OMPDeclareSimdDeclAttr::BS_Undefined:
10670         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10671                              OutputBecomesInput, Fn);
10672         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10673                              OutputBecomesInput, Fn);
10674         break;
10675       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10676         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10677                              OutputBecomesInput, Fn);
10678         break;
10679       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10680         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10681                              OutputBecomesInput, Fn);
10682         break;
10683       }
10684     }
10685   } else {
10686     // If no user simdlen is provided, follow the AAVFABI rules for
10687     // generating the vector length.
10688     if (ISA == 's') {
10689       // SVE, section 3.4.1, item 1.
10690       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10691                            OutputBecomesInput, Fn);
10692     } else {
10693       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10694       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10695       // two vector names depending on the use of the clause
10696       // `[not]inbranch`.
10697       switch (State) {
10698       case OMPDeclareSimdDeclAttr::BS_Undefined:
10699         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10700                                   OutputBecomesInput, Fn);
10701         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10702                                   OutputBecomesInput, Fn);
10703         break;
10704       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10705         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10706                                   OutputBecomesInput, Fn);
10707         break;
10708       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10709         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10710                                   OutputBecomesInput, Fn);
10711         break;
10712       }
10713     }
10714   }
10715 }
10716 
10717 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10718                                               llvm::Function *Fn) {
10719   ASTContext &C = CGM.getContext();
10720   FD = FD->getMostRecentDecl();
10721   // Map params to their positions in function decl.
10722   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10723   if (isa<CXXMethodDecl>(FD))
10724     ParamPositions.try_emplace(FD, 0);
10725   unsigned ParamPos = ParamPositions.size();
10726   for (const ParmVarDecl *P : FD->parameters()) {
10727     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10728     ++ParamPos;
10729   }
10730   while (FD) {
10731     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10732       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10733       // Mark uniform parameters.
10734       for (const Expr *E : Attr->uniforms()) {
10735         E = E->IgnoreParenImpCasts();
10736         unsigned Pos;
10737         if (isa<CXXThisExpr>(E)) {
10738           Pos = ParamPositions[FD];
10739         } else {
10740           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10741                                 ->getCanonicalDecl();
10742           Pos = ParamPositions[PVD];
10743         }
10744         ParamAttrs[Pos].Kind = Uniform;
10745       }
10746       // Get alignment info.
10747       auto NI = Attr->alignments_begin();
10748       for (const Expr *E : Attr->aligneds()) {
10749         E = E->IgnoreParenImpCasts();
10750         unsigned Pos;
10751         QualType ParmTy;
10752         if (isa<CXXThisExpr>(E)) {
10753           Pos = ParamPositions[FD];
10754           ParmTy = E->getType();
10755         } else {
10756           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10757                                 ->getCanonicalDecl();
10758           Pos = ParamPositions[PVD];
10759           ParmTy = PVD->getType();
10760         }
10761         ParamAttrs[Pos].Alignment =
10762             (*NI)
10763                 ? (*NI)->EvaluateKnownConstInt(C)
10764                 : llvm::APSInt::getUnsigned(
10765                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10766                           .getQuantity());
10767         ++NI;
10768       }
10769       // Mark linear parameters.
10770       auto SI = Attr->steps_begin();
10771       auto MI = Attr->modifiers_begin();
10772       for (const Expr *E : Attr->linears()) {
10773         E = E->IgnoreParenImpCasts();
10774         unsigned Pos;
10775         if (isa<CXXThisExpr>(E)) {
10776           Pos = ParamPositions[FD];
10777         } else {
10778           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10779                                 ->getCanonicalDecl();
10780           Pos = ParamPositions[PVD];
10781         }
10782         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10783         ParamAttr.Kind = Linear;
10784         if (*SI) {
10785           Expr::EvalResult Result;
10786           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10787             if (const auto *DRE =
10788                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10789               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10790                 ParamAttr.Kind = LinearWithVarStride;
10791                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10792                     ParamPositions[StridePVD->getCanonicalDecl()]);
10793               }
10794             }
10795           } else {
10796             ParamAttr.StrideOrArg = Result.Val.getInt();
10797           }
10798         }
10799         ++SI;
10800         ++MI;
10801       }
10802       llvm::APSInt VLENVal;
10803       SourceLocation ExprLoc;
10804       const Expr *VLENExpr = Attr->getSimdlen();
10805       if (VLENExpr) {
10806         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10807         ExprLoc = VLENExpr->getExprLoc();
10808       }
10809       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10810       if (CGM.getTriple().isX86()) {
10811         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10812       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10813         unsigned VLEN = VLENVal.getExtValue();
10814         StringRef MangledName = Fn->getName();
10815         if (CGM.getTarget().hasFeature("sve"))
10816           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10817                                          MangledName, 's', 128, Fn, ExprLoc);
10818         if (CGM.getTarget().hasFeature("neon"))
10819           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10820                                          MangledName, 'n', 128, Fn, ExprLoc);
10821       }
10822     }
10823     FD = FD->getPreviousDecl();
10824   }
10825 }
10826 
10827 namespace {
10828 /// Cleanup action for doacross support.
10829 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10830 public:
10831   static const int DoacrossFinArgs = 2;
10832 
10833 private:
10834   llvm::FunctionCallee RTLFn;
10835   llvm::Value *Args[DoacrossFinArgs];
10836 
10837 public:
10838   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10839                     ArrayRef<llvm::Value *> CallArgs)
10840       : RTLFn(RTLFn) {
10841     assert(CallArgs.size() == DoacrossFinArgs);
10842     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10843   }
10844   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10845     if (!CGF.HaveInsertPoint())
10846       return;
10847     CGF.EmitRuntimeCall(RTLFn, Args);
10848   }
10849 };
10850 } // namespace
10851 
10852 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10853                                        const OMPLoopDirective &D,
10854                                        ArrayRef<Expr *> NumIterations) {
10855   if (!CGF.HaveInsertPoint())
10856     return;
10857 
10858   ASTContext &C = CGM.getContext();
10859   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10860   RecordDecl *RD;
10861   if (KmpDimTy.isNull()) {
10862     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10863     //  kmp_int64 lo; // lower
10864     //  kmp_int64 up; // upper
10865     //  kmp_int64 st; // stride
10866     // };
10867     RD = C.buildImplicitRecord("kmp_dim");
10868     RD->startDefinition();
10869     addFieldToRecordDecl(C, RD, Int64Ty);
10870     addFieldToRecordDecl(C, RD, Int64Ty);
10871     addFieldToRecordDecl(C, RD, Int64Ty);
10872     RD->completeDefinition();
10873     KmpDimTy = C.getRecordType(RD);
10874   } else {
10875     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10876   }
10877   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10878   QualType ArrayTy =
10879       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10880 
10881   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10882   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10883   enum { LowerFD = 0, UpperFD, StrideFD };
10884   // Fill dims with data.
10885   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10886     LValue DimsLVal = CGF.MakeAddrLValue(
10887         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10888     // dims.upper = num_iterations;
10889     LValue UpperLVal = CGF.EmitLValueForField(
10890         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10891     llvm::Value *NumIterVal =
10892         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10893                                  D.getNumIterations()->getType(), Int64Ty,
10894                                  D.getNumIterations()->getExprLoc());
10895     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10896     // dims.stride = 1;
10897     LValue StrideLVal = CGF.EmitLValueForField(
10898         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10899     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10900                           StrideLVal);
10901   }
10902 
10903   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10904   // kmp_int32 num_dims, struct kmp_dim * dims);
10905   llvm::Value *Args[] = {
10906       emitUpdateLocation(CGF, D.getBeginLoc()),
10907       getThreadID(CGF, D.getBeginLoc()),
10908       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10909       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10910           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10911           CGM.VoidPtrTy)};
10912 
10913   llvm::FunctionCallee RTLFn =
10914       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10915   CGF.EmitRuntimeCall(RTLFn, Args);
10916   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10917       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10918   llvm::FunctionCallee FiniRTLFn =
10919       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10920   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10921                                              llvm::makeArrayRef(FiniArgs));
10922 }
10923 
10924 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10925                                           const OMPDependClause *C) {
10926   QualType Int64Ty =
10927       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10928   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10929   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10930       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10931   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10932   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10933     const Expr *CounterVal = C->getLoopData(I);
10934     assert(CounterVal);
10935     llvm::Value *CntVal = CGF.EmitScalarConversion(
10936         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10937         CounterVal->getExprLoc());
10938     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10939                           /*Volatile=*/false, Int64Ty);
10940   }
10941   llvm::Value *Args[] = {
10942       emitUpdateLocation(CGF, C->getBeginLoc()),
10943       getThreadID(CGF, C->getBeginLoc()),
10944       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10945   llvm::FunctionCallee RTLFn;
10946   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10947     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10948   } else {
10949     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10950     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10951   }
10952   CGF.EmitRuntimeCall(RTLFn, Args);
10953 }
10954 
10955 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10956                                llvm::FunctionCallee Callee,
10957                                ArrayRef<llvm::Value *> Args) const {
10958   assert(Loc.isValid() && "Outlined function call location must be valid.");
10959   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10960 
10961   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10962     if (Fn->doesNotThrow()) {
10963       CGF.EmitNounwindRuntimeCall(Fn, Args);
10964       return;
10965     }
10966   }
10967   CGF.EmitRuntimeCall(Callee, Args);
10968 }
10969 
10970 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10971     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10972     ArrayRef<llvm::Value *> Args) const {
10973   emitCall(CGF, Loc, OutlinedFn, Args);
10974 }
10975 
10976 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10977   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10978     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10979       HasEmittedDeclareTargetRegion = true;
10980 }
10981 
10982 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10983                                              const VarDecl *NativeParam,
10984                                              const VarDecl *TargetParam) const {
10985   return CGF.GetAddrOfLocalVar(NativeParam);
10986 }
10987 
10988 namespace {
10989 /// Cleanup action for allocate support.
10990 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10991 public:
10992   static const int CleanupArgs = 3;
10993 
10994 private:
10995   llvm::FunctionCallee RTLFn;
10996   llvm::Value *Args[CleanupArgs];
10997 
10998 public:
10999   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11000                        ArrayRef<llvm::Value *> CallArgs)
11001       : RTLFn(RTLFn) {
11002     assert(CallArgs.size() == CleanupArgs &&
11003            "Size of arguments does not match.");
11004     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11005   }
11006   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11007     if (!CGF.HaveInsertPoint())
11008       return;
11009     CGF.EmitRuntimeCall(RTLFn, Args);
11010   }
11011 };
11012 } // namespace
11013 
11014 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11015                                                    const VarDecl *VD) {
11016   if (!VD)
11017     return Address::invalid();
11018   const VarDecl *CVD = VD->getCanonicalDecl();
11019   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11020     return Address::invalid();
11021   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11022   // Use the default allocation.
11023   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11024       !AA->getAllocator())
11025     return Address::invalid();
11026   llvm::Value *Size;
11027   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11028   if (CVD->getType()->isVariablyModifiedType()) {
11029     Size = CGF.getTypeSize(CVD->getType());
11030     // Align the size: ((size + align - 1) / align) * align
11031     Size = CGF.Builder.CreateNUWAdd(
11032         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11033     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11034     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11035   } else {
11036     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11037     Size = CGM.getSize(Sz.alignTo(Align));
11038   }
11039   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11040   assert(AA->getAllocator() &&
11041          "Expected allocator expression for non-default allocator.");
11042   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11043   // According to the standard, the original allocator type is a enum (integer).
11044   // Convert to pointer type, if required.
11045   if (Allocator->getType()->isIntegerTy())
11046     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11047   else if (Allocator->getType()->isPointerTy())
11048     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11049                                                                 CGM.VoidPtrTy);
11050   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11051 
11052   llvm::Value *Addr =
11053       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11054                           getName({CVD->getName(), ".void.addr"}));
11055   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11056                                                               Allocator};
11057   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11058 
11059   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11060                                                 llvm::makeArrayRef(FiniArgs));
11061   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11062       Addr,
11063       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11064       getName({CVD->getName(), ".addr"}));
11065   return Address(Addr, Align);
11066 }
11067 
11068 /// Finds the variant function that matches current context with its context
11069 /// selector.
11070 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11071                                                      const FunctionDecl *FD) {
11072   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11073     return FD;
11074 
11075   SmallVector<Expr *, 8> VariantExprs;
11076   SmallVector<VariantMatchInfo, 8> VMIs;
11077   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11078     const OMPTraitInfo *TI = A->getTraitInfos();
11079     if (!TI)
11080       continue;
11081     VMIs.push_back(VariantMatchInfo());
11082     TI->getAsVariantMatchInfo(CGM.getContext(), VMIs.back());
11083     VariantExprs.push_back(A->getVariantFuncRef());
11084   }
11085 
11086   OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple());
11087   // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we
11088   //        build them.
11089 
11090   int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx);
11091   if (BestMatchIdx < 0)
11092     return FD;
11093 
11094   return cast<FunctionDecl>(
11095       cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts())
11096           ->getDecl());
11097 }
11098 
11099 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11100   const auto *D = cast<FunctionDecl>(GD.getDecl());
11101   // If the original function is defined already, use its definition.
11102   StringRef MangledName = CGM.getMangledName(GD);
11103   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11104   if (Orig && !Orig->isDeclaration())
11105     return false;
11106   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11107   // Emit original function if it does not have declare variant attribute or the
11108   // context does not match.
11109   if (NewFD == D)
11110     return false;
11111   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11112   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11113     DeferredVariantFunction.erase(D);
11114     return true;
11115   }
11116   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11117   return true;
11118 }
11119 
11120 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11121     CodeGenModule &CGM, const OMPLoopDirective &S)
11122     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11123   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11124   if (!NeedToPush)
11125     return;
11126   NontemporalDeclsSet &DS =
11127       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11128   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11129     for (const Stmt *Ref : C->private_refs()) {
11130       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11131       const ValueDecl *VD;
11132       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11133         VD = DRE->getDecl();
11134       } else {
11135         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11136         assert((ME->isImplicitCXXThis() ||
11137                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11138                "Expected member of current class.");
11139         VD = ME->getMemberDecl();
11140       }
11141       DS.insert(VD);
11142     }
11143   }
11144 }
11145 
11146 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11147   if (!NeedToPush)
11148     return;
11149   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11150 }
11151 
11152 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11153   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11154 
11155   return llvm::any_of(
11156       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11157       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11158 }
11159 
11160 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11161     const OMPExecutableDirective &S,
11162     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11163     const {
11164   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11165   // Vars in target/task regions must be excluded completely.
11166   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11167       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11168     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11169     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11170     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11171     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11172       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11173         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11174     }
11175   }
11176   // Exclude vars in private clauses.
11177   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11178     for (const Expr *Ref : C->varlists()) {
11179       if (!Ref->getType()->isScalarType())
11180         continue;
11181       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11182       if (!DRE)
11183         continue;
11184       NeedToCheckForLPCs.insert(DRE->getDecl());
11185     }
11186   }
11187   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11188     for (const Expr *Ref : C->varlists()) {
11189       if (!Ref->getType()->isScalarType())
11190         continue;
11191       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11192       if (!DRE)
11193         continue;
11194       NeedToCheckForLPCs.insert(DRE->getDecl());
11195     }
11196   }
11197   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11198     for (const Expr *Ref : C->varlists()) {
11199       if (!Ref->getType()->isScalarType())
11200         continue;
11201       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11202       if (!DRE)
11203         continue;
11204       NeedToCheckForLPCs.insert(DRE->getDecl());
11205     }
11206   }
11207   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11208     for (const Expr *Ref : C->varlists()) {
11209       if (!Ref->getType()->isScalarType())
11210         continue;
11211       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11212       if (!DRE)
11213         continue;
11214       NeedToCheckForLPCs.insert(DRE->getDecl());
11215     }
11216   }
11217   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11218     for (const Expr *Ref : C->varlists()) {
11219       if (!Ref->getType()->isScalarType())
11220         continue;
11221       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11222       if (!DRE)
11223         continue;
11224       NeedToCheckForLPCs.insert(DRE->getDecl());
11225     }
11226   }
11227   for (const Decl *VD : NeedToCheckForLPCs) {
11228     for (const LastprivateConditionalData &Data :
11229          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11230       if (Data.DeclToUniqueName.count(VD) > 0) {
11231         if (!Data.Disabled)
11232           NeedToAddForLPCsAsDisabled.insert(VD);
11233         break;
11234       }
11235     }
11236   }
11237 }
11238 
11239 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11240     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11241     : CGM(CGF.CGM),
11242       Action((CGM.getLangOpts().OpenMP >= 50 &&
11243               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11244                            [](const OMPLastprivateClause *C) {
11245                              return C->getKind() ==
11246                                     OMPC_LASTPRIVATE_conditional;
11247                            }))
11248                  ? ActionToDo::PushAsLastprivateConditional
11249                  : ActionToDo::DoNotPush) {
11250   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11251   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11252     return;
11253   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11254          "Expected a push action.");
11255   LastprivateConditionalData &Data =
11256       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11257   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11258     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11259       continue;
11260 
11261     for (const Expr *Ref : C->varlists()) {
11262       Data.DeclToUniqueName.insert(std::make_pair(
11263           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11264           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11265     }
11266   }
11267   Data.IVLVal = IVLVal;
11268   Data.Fn = CGF.CurFn;
11269 }
11270 
11271 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11272     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11273     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11274   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11275   if (CGM.getLangOpts().OpenMP < 50)
11276     return;
11277   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11278   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11279   if (!NeedToAddForLPCsAsDisabled.empty()) {
11280     Action = ActionToDo::DisableLastprivateConditional;
11281     LastprivateConditionalData &Data =
11282         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11283     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11284       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11285     Data.Fn = CGF.CurFn;
11286     Data.Disabled = true;
11287   }
11288 }
11289 
11290 CGOpenMPRuntime::LastprivateConditionalRAII
11291 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11292     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11293   return LastprivateConditionalRAII(CGF, S);
11294 }
11295 
11296 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11297   if (CGM.getLangOpts().OpenMP < 50)
11298     return;
11299   if (Action == ActionToDo::DisableLastprivateConditional) {
11300     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11301            "Expected list of disabled private vars.");
11302     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11303   }
11304   if (Action == ActionToDo::PushAsLastprivateConditional) {
11305     assert(
11306         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11307         "Expected list of lastprivate conditional vars.");
11308     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11309   }
11310 }
11311 
11312 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11313                                                         const VarDecl *VD) {
11314   ASTContext &C = CGM.getContext();
11315   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11316   if (I == LastprivateConditionalToTypes.end())
11317     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11318   QualType NewType;
11319   const FieldDecl *VDField;
11320   const FieldDecl *FiredField;
11321   LValue BaseLVal;
11322   auto VI = I->getSecond().find(VD);
11323   if (VI == I->getSecond().end()) {
11324     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11325     RD->startDefinition();
11326     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11327     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11328     RD->completeDefinition();
11329     NewType = C.getRecordType(RD);
11330     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11331     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11332     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11333   } else {
11334     NewType = std::get<0>(VI->getSecond());
11335     VDField = std::get<1>(VI->getSecond());
11336     FiredField = std::get<2>(VI->getSecond());
11337     BaseLVal = std::get<3>(VI->getSecond());
11338   }
11339   LValue FiredLVal =
11340       CGF.EmitLValueForField(BaseLVal, FiredField);
11341   CGF.EmitStoreOfScalar(
11342       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11343       FiredLVal);
11344   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11345 }
11346 
11347 namespace {
11348 /// Checks if the lastprivate conditional variable is referenced in LHS.
11349 class LastprivateConditionalRefChecker final
11350     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11351   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11352   const Expr *FoundE = nullptr;
11353   const Decl *FoundD = nullptr;
11354   StringRef UniqueDeclName;
11355   LValue IVLVal;
11356   llvm::Function *FoundFn = nullptr;
11357   SourceLocation Loc;
11358 
11359 public:
11360   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11361     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11362          llvm::reverse(LPM)) {
11363       auto It = D.DeclToUniqueName.find(E->getDecl());
11364       if (It == D.DeclToUniqueName.end())
11365         continue;
11366       if (D.Disabled)
11367         return false;
11368       FoundE = E;
11369       FoundD = E->getDecl()->getCanonicalDecl();
11370       UniqueDeclName = It->second;
11371       IVLVal = D.IVLVal;
11372       FoundFn = D.Fn;
11373       break;
11374     }
11375     return FoundE == E;
11376   }
11377   bool VisitMemberExpr(const MemberExpr *E) {
11378     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11379       return false;
11380     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11381          llvm::reverse(LPM)) {
11382       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11383       if (It == D.DeclToUniqueName.end())
11384         continue;
11385       if (D.Disabled)
11386         return false;
11387       FoundE = E;
11388       FoundD = E->getMemberDecl()->getCanonicalDecl();
11389       UniqueDeclName = It->second;
11390       IVLVal = D.IVLVal;
11391       FoundFn = D.Fn;
11392       break;
11393     }
11394     return FoundE == E;
11395   }
11396   bool VisitStmt(const Stmt *S) {
11397     for (const Stmt *Child : S->children()) {
11398       if (!Child)
11399         continue;
11400       if (const auto *E = dyn_cast<Expr>(Child))
11401         if (!E->isGLValue())
11402           continue;
11403       if (Visit(Child))
11404         return true;
11405     }
11406     return false;
11407   }
11408   explicit LastprivateConditionalRefChecker(
11409       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11410       : LPM(LPM) {}
11411   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11412   getFoundData() const {
11413     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11414   }
11415 };
11416 } // namespace
11417 
11418 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11419                                                        LValue IVLVal,
11420                                                        StringRef UniqueDeclName,
11421                                                        LValue LVal,
11422                                                        SourceLocation Loc) {
11423   // Last updated loop counter for the lastprivate conditional var.
11424   // int<xx> last_iv = 0;
11425   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11426   llvm::Constant *LastIV =
11427       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11428   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11429       IVLVal.getAlignment().getAsAlign());
11430   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11431 
11432   // Last value of the lastprivate conditional.
11433   // decltype(priv_a) last_a;
11434   llvm::Constant *Last = getOrCreateInternalVariable(
11435       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11436   cast<llvm::GlobalVariable>(Last)->setAlignment(
11437       LVal.getAlignment().getAsAlign());
11438   LValue LastLVal =
11439       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11440 
11441   // Global loop counter. Required to handle inner parallel-for regions.
11442   // iv
11443   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11444 
11445   // #pragma omp critical(a)
11446   // if (last_iv <= iv) {
11447   //   last_iv = iv;
11448   //   last_a = priv_a;
11449   // }
11450   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11451                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11452     Action.Enter(CGF);
11453     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11454     // (last_iv <= iv) ? Check if the variable is updated and store new
11455     // value in global var.
11456     llvm::Value *CmpRes;
11457     if (IVLVal.getType()->isSignedIntegerType()) {
11458       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11459     } else {
11460       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11461              "Loop iteration variable must be integer.");
11462       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11463     }
11464     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11465     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11466     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11467     // {
11468     CGF.EmitBlock(ThenBB);
11469 
11470     //   last_iv = iv;
11471     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11472 
11473     //   last_a = priv_a;
11474     switch (CGF.getEvaluationKind(LVal.getType())) {
11475     case TEK_Scalar: {
11476       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11477       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11478       break;
11479     }
11480     case TEK_Complex: {
11481       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11482       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11483       break;
11484     }
11485     case TEK_Aggregate:
11486       llvm_unreachable(
11487           "Aggregates are not supported in lastprivate conditional.");
11488     }
11489     // }
11490     CGF.EmitBranch(ExitBB);
11491     // There is no need to emit line number for unconditional branch.
11492     (void)ApplyDebugLocation::CreateEmpty(CGF);
11493     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11494   };
11495 
11496   if (CGM.getLangOpts().OpenMPSimd) {
11497     // Do not emit as a critical region as no parallel region could be emitted.
11498     RegionCodeGenTy ThenRCG(CodeGen);
11499     ThenRCG(CGF);
11500   } else {
11501     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11502   }
11503 }
11504 
11505 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11506                                                          const Expr *LHS) {
11507   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11508     return;
11509   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11510   if (!Checker.Visit(LHS))
11511     return;
11512   const Expr *FoundE;
11513   const Decl *FoundD;
11514   StringRef UniqueDeclName;
11515   LValue IVLVal;
11516   llvm::Function *FoundFn;
11517   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11518       Checker.getFoundData();
11519   if (FoundFn != CGF.CurFn) {
11520     // Special codegen for inner parallel regions.
11521     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11522     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11523     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11524            "Lastprivate conditional is not found in outer region.");
11525     QualType StructTy = std::get<0>(It->getSecond());
11526     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11527     LValue PrivLVal = CGF.EmitLValue(FoundE);
11528     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11529         PrivLVal.getAddress(CGF),
11530         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11531     LValue BaseLVal =
11532         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11533     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11534     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11535                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11536                         FiredLVal, llvm::AtomicOrdering::Unordered,
11537                         /*IsVolatile=*/true, /*isInit=*/false);
11538     return;
11539   }
11540 
11541   // Private address of the lastprivate conditional in the current context.
11542   // priv_a
11543   LValue LVal = CGF.EmitLValue(FoundE);
11544   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11545                                    FoundE->getExprLoc());
11546 }
11547 
11548 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11549     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11550     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11551   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11552     return;
11553   auto Range = llvm::reverse(LastprivateConditionalStack);
11554   auto It = llvm::find_if(
11555       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11556   if (It == Range.end() || It->Fn != CGF.CurFn)
11557     return;
11558   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11559   assert(LPCI != LastprivateConditionalToTypes.end() &&
11560          "Lastprivates must be registered already.");
11561   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11562   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11563   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11564   for (const auto &Pair : It->DeclToUniqueName) {
11565     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11566     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11567       continue;
11568     auto I = LPCI->getSecond().find(Pair.first);
11569     assert(I != LPCI->getSecond().end() &&
11570            "Lastprivate must be rehistered already.");
11571     // bool Cmp = priv_a.Fired != 0;
11572     LValue BaseLVal = std::get<3>(I->getSecond());
11573     LValue FiredLVal =
11574         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11575     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11576     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11577     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11578     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11579     // if (Cmp) {
11580     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11581     CGF.EmitBlock(ThenBB);
11582     Address Addr = CGF.GetAddrOfLocalVar(VD);
11583     LValue LVal;
11584     if (VD->getType()->isReferenceType())
11585       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11586                                            AlignmentSource::Decl);
11587     else
11588       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11589                                 AlignmentSource::Decl);
11590     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11591                                      D.getBeginLoc());
11592     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11593     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11594     // }
11595   }
11596 }
11597 
11598 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11599     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11600     SourceLocation Loc) {
11601   if (CGF.getLangOpts().OpenMP < 50)
11602     return;
11603   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11604   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11605          "Unknown lastprivate conditional variable.");
11606   StringRef UniqueName = It->second;
11607   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11608   // The variable was not updated in the region - exit.
11609   if (!GV)
11610     return;
11611   LValue LPLVal = CGF.MakeAddrLValue(
11612       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11613   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11614   CGF.EmitStoreOfScalar(Res, PrivLVal);
11615 }
11616 
11617 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11618     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11619     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11620   llvm_unreachable("Not supported in SIMD-only mode");
11621 }
11622 
11623 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11624     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11625     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11626   llvm_unreachable("Not supported in SIMD-only mode");
11627 }
11628 
11629 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11630     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11631     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11632     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11633     bool Tied, unsigned &NumberOfParts) {
11634   llvm_unreachable("Not supported in SIMD-only mode");
11635 }
11636 
11637 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11638                                            SourceLocation Loc,
11639                                            llvm::Function *OutlinedFn,
11640                                            ArrayRef<llvm::Value *> CapturedVars,
11641                                            const Expr *IfCond) {
11642   llvm_unreachable("Not supported in SIMD-only mode");
11643 }
11644 
11645 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11646     CodeGenFunction &CGF, StringRef CriticalName,
11647     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11648     const Expr *Hint) {
11649   llvm_unreachable("Not supported in SIMD-only mode");
11650 }
11651 
11652 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11653                                            const RegionCodeGenTy &MasterOpGen,
11654                                            SourceLocation Loc) {
11655   llvm_unreachable("Not supported in SIMD-only mode");
11656 }
11657 
11658 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11659                                             SourceLocation Loc) {
11660   llvm_unreachable("Not supported in SIMD-only mode");
11661 }
11662 
11663 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11664     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11665     SourceLocation Loc) {
11666   llvm_unreachable("Not supported in SIMD-only mode");
11667 }
11668 
11669 void CGOpenMPSIMDRuntime::emitSingleRegion(
11670     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11671     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11672     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11673     ArrayRef<const Expr *> AssignmentOps) {
11674   llvm_unreachable("Not supported in SIMD-only mode");
11675 }
11676 
11677 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11678                                             const RegionCodeGenTy &OrderedOpGen,
11679                                             SourceLocation Loc,
11680                                             bool IsThreads) {
11681   llvm_unreachable("Not supported in SIMD-only mode");
11682 }
11683 
11684 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11685                                           SourceLocation Loc,
11686                                           OpenMPDirectiveKind Kind,
11687                                           bool EmitChecks,
11688                                           bool ForceSimpleCall) {
11689   llvm_unreachable("Not supported in SIMD-only mode");
11690 }
11691 
11692 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11693     CodeGenFunction &CGF, SourceLocation Loc,
11694     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11695     bool Ordered, const DispatchRTInput &DispatchValues) {
11696   llvm_unreachable("Not supported in SIMD-only mode");
11697 }
11698 
11699 void CGOpenMPSIMDRuntime::emitForStaticInit(
11700     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11701     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11702   llvm_unreachable("Not supported in SIMD-only mode");
11703 }
11704 
11705 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11706     CodeGenFunction &CGF, SourceLocation Loc,
11707     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11708   llvm_unreachable("Not supported in SIMD-only mode");
11709 }
11710 
11711 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11712                                                      SourceLocation Loc,
11713                                                      unsigned IVSize,
11714                                                      bool IVSigned) {
11715   llvm_unreachable("Not supported in SIMD-only mode");
11716 }
11717 
11718 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11719                                               SourceLocation Loc,
11720                                               OpenMPDirectiveKind DKind) {
11721   llvm_unreachable("Not supported in SIMD-only mode");
11722 }
11723 
11724 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11725                                               SourceLocation Loc,
11726                                               unsigned IVSize, bool IVSigned,
11727                                               Address IL, Address LB,
11728                                               Address UB, Address ST) {
11729   llvm_unreachable("Not supported in SIMD-only mode");
11730 }
11731 
11732 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11733                                                llvm::Value *NumThreads,
11734                                                SourceLocation Loc) {
11735   llvm_unreachable("Not supported in SIMD-only mode");
11736 }
11737 
11738 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11739                                              ProcBindKind ProcBind,
11740                                              SourceLocation Loc) {
11741   llvm_unreachable("Not supported in SIMD-only mode");
11742 }
11743 
11744 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11745                                                     const VarDecl *VD,
11746                                                     Address VDAddr,
11747                                                     SourceLocation Loc) {
11748   llvm_unreachable("Not supported in SIMD-only mode");
11749 }
11750 
11751 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11752     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11753     CodeGenFunction *CGF) {
11754   llvm_unreachable("Not supported in SIMD-only mode");
11755 }
11756 
11757 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11758     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11759   llvm_unreachable("Not supported in SIMD-only mode");
11760 }
11761 
11762 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11763                                     ArrayRef<const Expr *> Vars,
11764                                     SourceLocation Loc,
11765                                     llvm::AtomicOrdering AO) {
11766   llvm_unreachable("Not supported in SIMD-only mode");
11767 }
11768 
11769 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11770                                        const OMPExecutableDirective &D,
11771                                        llvm::Function *TaskFunction,
11772                                        QualType SharedsTy, Address Shareds,
11773                                        const Expr *IfCond,
11774                                        const OMPTaskDataTy &Data) {
11775   llvm_unreachable("Not supported in SIMD-only mode");
11776 }
11777 
11778 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11779     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11780     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11781     const Expr *IfCond, const OMPTaskDataTy &Data) {
11782   llvm_unreachable("Not supported in SIMD-only mode");
11783 }
11784 
11785 void CGOpenMPSIMDRuntime::emitReduction(
11786     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11787     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11788     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11789   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11790   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11791                                  ReductionOps, Options);
11792 }
11793 
11794 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11795     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11796     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11797   llvm_unreachable("Not supported in SIMD-only mode");
11798 }
11799 
11800 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11801                                                   SourceLocation Loc,
11802                                                   ReductionCodeGen &RCG,
11803                                                   unsigned N) {
11804   llvm_unreachable("Not supported in SIMD-only mode");
11805 }
11806 
11807 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11808                                                   SourceLocation Loc,
11809                                                   llvm::Value *ReductionsPtr,
11810                                                   LValue SharedLVal) {
11811   llvm_unreachable("Not supported in SIMD-only mode");
11812 }
11813 
11814 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11815                                            SourceLocation Loc) {
11816   llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
11819 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11820     CodeGenFunction &CGF, SourceLocation Loc,
11821     OpenMPDirectiveKind CancelRegion) {
11822   llvm_unreachable("Not supported in SIMD-only mode");
11823 }
11824 
11825 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11826                                          SourceLocation Loc, const Expr *IfCond,
11827                                          OpenMPDirectiveKind CancelRegion) {
11828   llvm_unreachable("Not supported in SIMD-only mode");
11829 }
11830 
11831 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11832     const OMPExecutableDirective &D, StringRef ParentName,
11833     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11834     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11835   llvm_unreachable("Not supported in SIMD-only mode");
11836 }
11837 
11838 void CGOpenMPSIMDRuntime::emitTargetCall(
11839     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11840     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11841     const Expr *Device,
11842     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11843                                      const OMPLoopDirective &D)>
11844         SizeEmitter) {
11845   llvm_unreachable("Not supported in SIMD-only mode");
11846 }
11847 
11848 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11849   llvm_unreachable("Not supported in SIMD-only mode");
11850 }
11851 
11852 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11853   llvm_unreachable("Not supported in SIMD-only mode");
11854 }
11855 
11856 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11857   return false;
11858 }
11859 
11860 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11861                                         const OMPExecutableDirective &D,
11862                                         SourceLocation Loc,
11863                                         llvm::Function *OutlinedFn,
11864                                         ArrayRef<llvm::Value *> CapturedVars) {
11865   llvm_unreachable("Not supported in SIMD-only mode");
11866 }
11867 
11868 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11869                                              const Expr *NumTeams,
11870                                              const Expr *ThreadLimit,
11871                                              SourceLocation Loc) {
11872   llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874 
11875 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11876     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11877     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11878   llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11882     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11883     const Expr *Device) {
11884   llvm_unreachable("Not supported in SIMD-only mode");
11885 }
11886 
11887 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11888                                            const OMPLoopDirective &D,
11889                                            ArrayRef<Expr *> NumIterations) {
11890   llvm_unreachable("Not supported in SIMD-only mode");
11891 }
11892 
11893 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11894                                               const OMPDependClause *C) {
11895   llvm_unreachable("Not supported in SIMD-only mode");
11896 }
11897 
11898 const VarDecl *
11899 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11900                                         const VarDecl *NativeParam) const {
11901   llvm_unreachable("Not supported in SIMD-only mode");
11902 }
11903 
11904 Address
11905 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11906                                          const VarDecl *NativeParam,
11907                                          const VarDecl *TargetParam) const {
11908   llvm_unreachable("Not supported in SIMD-only mode");
11909 }
11910