1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/CodeGen/ConstantInitBuilder.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/Bitcode/BitcodeReader.h"
29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/Support/Format.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <cassert>
36 
37 using namespace clang;
38 using namespace CodeGen;
39 using namespace llvm::omp;
40 
41 namespace {
42 /// Base class for handling code generation inside OpenMP regions.
43 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
44 public:
45   /// Kinds of OpenMP regions used in codegen.
46   enum CGOpenMPRegionKind {
47     /// Region with outlined function for standalone 'parallel'
48     /// directive.
49     ParallelOutlinedRegion,
50     /// Region with outlined function for standalone 'task' directive.
51     TaskOutlinedRegion,
52     /// Region for constructs that do not require function outlining,
53     /// like 'for', 'sections', 'atomic' etc. directives.
54     InlinedRegion,
55     /// Region with outlined function for standalone 'target' directive.
56     TargetRegion,
57   };
58 
59   CGOpenMPRegionInfo(const CapturedStmt &CS,
60                      const CGOpenMPRegionKind RegionKind,
61                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62                      bool HasCancel)
63       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
64         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
65 
66   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
70         Kind(Kind), HasCancel(HasCancel) {}
71 
72   /// Get a variable or parameter for storing global thread id
73   /// inside OpenMP construct.
74   virtual const VarDecl *getThreadIDVariable() const = 0;
75 
76   /// Emit the captured statement body.
77   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
78 
79   /// Get an LValue for the current ThreadID variable.
80   /// \return LValue for thread id variable. This LValue always has type int32*.
81   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
82 
83   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
84 
85   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
86 
87   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
88 
89   bool hasCancel() const { return HasCancel; }
90 
91   static bool classof(const CGCapturedStmtInfo *Info) {
92     return Info->getKind() == CR_OpenMP;
93   }
94 
95   ~CGOpenMPRegionInfo() override = default;
96 
97 protected:
98   CGOpenMPRegionKind RegionKind;
99   RegionCodeGenTy CodeGen;
100   OpenMPDirectiveKind Kind;
101   bool HasCancel;
102 };
103 
104 /// API for captured statement code generation in OpenMP constructs.
105 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
106 public:
107   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
108                              const RegionCodeGenTy &CodeGen,
109                              OpenMPDirectiveKind Kind, bool HasCancel,
110                              StringRef HelperName)
111       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
112                            HasCancel),
113         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
114     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
115   }
116 
117   /// Get a variable or parameter for storing global thread id
118   /// inside OpenMP construct.
119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120 
121   /// Get the name of the capture helper.
122   StringRef getHelperName() const override { return HelperName; }
123 
124   static bool classof(const CGCapturedStmtInfo *Info) {
125     return CGOpenMPRegionInfo::classof(Info) &&
126            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
127                ParallelOutlinedRegion;
128   }
129 
130 private:
131   /// A variable or parameter storing global thread id for OpenMP
132   /// constructs.
133   const VarDecl *ThreadIDVar;
134   StringRef HelperName;
135 };
136 
137 /// API for captured statement code generation in OpenMP constructs.
138 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
139 public:
140   class UntiedTaskActionTy final : public PrePostActionTy {
141     bool Untied;
142     const VarDecl *PartIDVar;
143     const RegionCodeGenTy UntiedCodeGen;
144     llvm::SwitchInst *UntiedSwitch = nullptr;
145 
146   public:
147     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
148                        const RegionCodeGenTy &UntiedCodeGen)
149         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
150     void Enter(CodeGenFunction &CGF) override {
151       if (Untied) {
152         // Emit task switching point.
153         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
154             CGF.GetAddrOfLocalVar(PartIDVar),
155             PartIDVar->getType()->castAs<PointerType>());
156         llvm::Value *Res =
157             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
158         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
159         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
160         CGF.EmitBlock(DoneBB);
161         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
162         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
163         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
164                               CGF.Builder.GetInsertBlock());
165         emitUntiedSwitch(CGF);
166       }
167     }
168     void emitUntiedSwitch(CodeGenFunction &CGF) const {
169       if (Untied) {
170         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
171             CGF.GetAddrOfLocalVar(PartIDVar),
172             PartIDVar->getType()->castAs<PointerType>());
173         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               PartIdLVal);
175         UntiedCodeGen(CGF);
176         CodeGenFunction::JumpDest CurPoint =
177             CGF.getJumpDestInCurrentScope(".untied.next.");
178         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
179         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
180         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               CGF.Builder.GetInsertBlock());
182         CGF.EmitBranchThroughCleanup(CurPoint);
183         CGF.EmitBlock(CurPoint.getBlock());
184       }
185     }
186     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
187   };
188   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
189                                  const VarDecl *ThreadIDVar,
190                                  const RegionCodeGenTy &CodeGen,
191                                  OpenMPDirectiveKind Kind, bool HasCancel,
192                                  const UntiedTaskActionTy &Action)
193       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
194         ThreadIDVar(ThreadIDVar), Action(Action) {
195     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
196   }
197 
198   /// Get a variable or parameter for storing global thread id
199   /// inside OpenMP construct.
200   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
201 
202   /// Get an LValue for the current ThreadID variable.
203   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
204 
205   /// Get the name of the capture helper.
206   StringRef getHelperName() const override { return ".omp_outlined."; }
207 
208   void emitUntiedSwitch(CodeGenFunction &CGF) override {
209     Action.emitUntiedSwitch(CGF);
210   }
211 
212   static bool classof(const CGCapturedStmtInfo *Info) {
213     return CGOpenMPRegionInfo::classof(Info) &&
214            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
215                TaskOutlinedRegion;
216   }
217 
218 private:
219   /// A variable or parameter storing global thread id for OpenMP
220   /// constructs.
221   const VarDecl *ThreadIDVar;
222   /// Action for emitting code for untied tasks.
223   const UntiedTaskActionTy &Action;
224 };
225 
226 /// API for inlined captured statement code generation in OpenMP
227 /// constructs.
228 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
229 public:
230   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
231                             const RegionCodeGenTy &CodeGen,
232                             OpenMPDirectiveKind Kind, bool HasCancel)
233       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
234         OldCSI(OldCSI),
235         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
236 
237   // Retrieve the value of the context parameter.
238   llvm::Value *getContextValue() const override {
239     if (OuterRegionInfo)
240       return OuterRegionInfo->getContextValue();
241     llvm_unreachable("No context value for inlined OpenMP region");
242   }
243 
244   void setContextValue(llvm::Value *V) override {
245     if (OuterRegionInfo) {
246       OuterRegionInfo->setContextValue(V);
247       return;
248     }
249     llvm_unreachable("No context value for inlined OpenMP region");
250   }
251 
252   /// Lookup the captured field decl for a variable.
253   const FieldDecl *lookup(const VarDecl *VD) const override {
254     if (OuterRegionInfo)
255       return OuterRegionInfo->lookup(VD);
256     // If there is no outer outlined region,no need to lookup in a list of
257     // captured variables, we can use the original one.
258     return nullptr;
259   }
260 
261   FieldDecl *getThisFieldDecl() const override {
262     if (OuterRegionInfo)
263       return OuterRegionInfo->getThisFieldDecl();
264     return nullptr;
265   }
266 
267   /// Get a variable or parameter for storing global thread id
268   /// inside OpenMP construct.
269   const VarDecl *getThreadIDVariable() const override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariable();
272     return nullptr;
273   }
274 
275   /// Get an LValue for the current ThreadID variable.
276   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
279     llvm_unreachable("No LValue for inlined OpenMP construct");
280   }
281 
282   /// Get the name of the capture helper.
283   StringRef getHelperName() const override {
284     if (auto *OuterRegionInfo = getOldCSI())
285       return OuterRegionInfo->getHelperName();
286     llvm_unreachable("No helper name for inlined OpenMP construct");
287   }
288 
289   void emitUntiedSwitch(CodeGenFunction &CGF) override {
290     if (OuterRegionInfo)
291       OuterRegionInfo->emitUntiedSwitch(CGF);
292   }
293 
294   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
295 
296   static bool classof(const CGCapturedStmtInfo *Info) {
297     return CGOpenMPRegionInfo::classof(Info) &&
298            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
299   }
300 
301   ~CGOpenMPInlinedRegionInfo() override = default;
302 
303 private:
304   /// CodeGen info about outer OpenMP region.
305   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
306   CGOpenMPRegionInfo *OuterRegionInfo;
307 };
308 
309 /// API for captured statement code generation in OpenMP target
310 /// constructs. For this captures, implicit parameters are used instead of the
311 /// captured fields. The name of the target region has to be unique in a given
312 /// application so it is provided by the client, because only the client has
313 /// the information to generate that.
314 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
315 public:
316   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
317                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
318       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
319                            /*HasCancel=*/false),
320         HelperName(HelperName) {}
321 
322   /// This is unused for target regions because each starts executing
323   /// with a single thread.
324   const VarDecl *getThreadIDVariable() const override { return nullptr; }
325 
326   /// Get the name of the capture helper.
327   StringRef getHelperName() const override { return HelperName; }
328 
329   static bool classof(const CGCapturedStmtInfo *Info) {
330     return CGOpenMPRegionInfo::classof(Info) &&
331            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
332   }
333 
334 private:
335   StringRef HelperName;
336 };
337 
338 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
339   llvm_unreachable("No codegen for expressions");
340 }
341 /// API for generation of expressions captured in a innermost OpenMP
342 /// region.
343 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
344 public:
345   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
346       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
347                                   OMPD_unknown,
348                                   /*HasCancel=*/false),
349         PrivScope(CGF) {
350     // Make sure the globals captured in the provided statement are local by
351     // using the privatization logic. We assume the same variable is not
352     // captured more than once.
353     for (const auto &C : CS.captures()) {
354       if (!C.capturesVariable() && !C.capturesVariableByCopy())
355         continue;
356 
357       const VarDecl *VD = C.getCapturedVar();
358       if (VD->isLocalVarDeclOrParm())
359         continue;
360 
361       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
362                       /*RefersToEnclosingVariableOrCapture=*/false,
363                       VD->getType().getNonReferenceType(), VK_LValue,
364                       C.getLocation());
365       PrivScope.addPrivate(
366           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
367     }
368     (void)PrivScope.Privatize();
369   }
370 
371   /// Lookup the captured field decl for a variable.
372   const FieldDecl *lookup(const VarDecl *VD) const override {
373     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
374       return FD;
375     return nullptr;
376   }
377 
378   /// Emit the captured statement body.
379   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
380     llvm_unreachable("No body for expressions");
381   }
382 
383   /// Get a variable or parameter for storing global thread id
384   /// inside OpenMP construct.
385   const VarDecl *getThreadIDVariable() const override {
386     llvm_unreachable("No thread id for expressions");
387   }
388 
389   /// Get the name of the capture helper.
390   StringRef getHelperName() const override {
391     llvm_unreachable("No helper name for expressions");
392   }
393 
394   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
395 
396 private:
397   /// Private scope to capture global variables.
398   CodeGenFunction::OMPPrivateScope PrivScope;
399 };
400 
401 /// RAII for emitting code of OpenMP constructs.
402 class InlinedOpenMPRegionRAII {
403   CodeGenFunction &CGF;
404   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
405   FieldDecl *LambdaThisCaptureField = nullptr;
406   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
407 
408 public:
409   /// Constructs region for combined constructs.
410   /// \param CodeGen Code generation sequence for combined directives. Includes
411   /// a list of functions used for code generation of implicitly inlined
412   /// regions.
413   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
414                           OpenMPDirectiveKind Kind, bool HasCancel)
415       : CGF(CGF) {
416     // Start emission for the construct.
417     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
418         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
419     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
420     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
421     CGF.LambdaThisCaptureField = nullptr;
422     BlockInfo = CGF.BlockInfo;
423     CGF.BlockInfo = nullptr;
424   }
425 
426   ~InlinedOpenMPRegionRAII() {
427     // Restore original CapturedStmtInfo only if we're done with code emission.
428     auto *OldCSI =
429         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
430     delete CGF.CapturedStmtInfo;
431     CGF.CapturedStmtInfo = OldCSI;
432     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
434     CGF.BlockInfo = BlockInfo;
435   }
436 };
437 
438 /// Values for bit flags used in the ident_t to describe the fields.
439 /// All enumeric elements are named and described in accordance with the code
440 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
441 enum OpenMPLocationFlags : unsigned {
442   /// Use trampoline for internal microtask.
443   OMP_IDENT_IMD = 0x01,
444   /// Use c-style ident structure.
445   OMP_IDENT_KMPC = 0x02,
446   /// Atomic reduction option for kmpc_reduce.
447   OMP_ATOMIC_REDUCE = 0x10,
448   /// Explicit 'barrier' directive.
449   OMP_IDENT_BARRIER_EXPL = 0x20,
450   /// Implicit barrier in code.
451   OMP_IDENT_BARRIER_IMPL = 0x40,
452   /// Implicit barrier in 'for' directive.
453   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
454   /// Implicit barrier in 'sections' directive.
455   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
456   /// Implicit barrier in 'single' directive.
457   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
458   /// Call of __kmp_for_static_init for static loop.
459   OMP_IDENT_WORK_LOOP = 0x200,
460   /// Call of __kmp_for_static_init for sections.
461   OMP_IDENT_WORK_SECTIONS = 0x400,
462   /// Call of __kmp_for_static_init for distribute.
463   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
464   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
465 };
466 
467 namespace {
468 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
469 /// Values for bit flags for marking which requires clauses have been used.
470 enum OpenMPOffloadingRequiresDirFlags : int64_t {
471   /// flag undefined.
472   OMP_REQ_UNDEFINED               = 0x000,
473   /// no requires clause present.
474   OMP_REQ_NONE                    = 0x001,
475   /// reverse_offload clause.
476   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
477   /// unified_address clause.
478   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
479   /// unified_shared_memory clause.
480   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
481   /// dynamic_allocators clause.
482   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
483   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
484 };
485 
486 enum OpenMPOffloadingReservedDeviceIDs {
487   /// Device ID if the device was not defined, runtime should get it
488   /// from environment variables in the spec.
489   OMP_DEVICEID_UNDEF = -1,
490 };
491 } // anonymous namespace
492 
493 /// Describes ident structure that describes a source location.
494 /// All descriptions are taken from
495 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
496 /// Original structure:
497 /// typedef struct ident {
498 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
499 ///                                  see above  */
500 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
501 ///                                  KMP_IDENT_KMPC identifies this union
502 ///                                  member  */
503 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
504 ///                                  see above */
505 ///#if USE_ITT_BUILD
506 ///                            /*  but currently used for storing
507 ///                                region-specific ITT */
508 ///                            /*  contextual information. */
509 ///#endif /* USE_ITT_BUILD */
510 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
511 ///                                 C++  */
512 ///    char const *psource;    /**< String describing the source location.
513 ///                            The string is composed of semi-colon separated
514 //                             fields which describe the source file,
515 ///                            the function and a pair of line numbers that
516 ///                            delimit the construct.
517 ///                             */
518 /// } ident_t;
519 enum IdentFieldIndex {
520   /// might be used in Fortran
521   IdentField_Reserved_1,
522   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
523   IdentField_Flags,
524   /// Not really used in Fortran any more
525   IdentField_Reserved_2,
526   /// Source[4] in Fortran, do not use for C++
527   IdentField_Reserved_3,
528   /// String describing the source location. The string is composed of
529   /// semi-colon separated fields which describe the source file, the function
530   /// and a pair of line numbers that delimit the construct.
531   IdentField_PSource
532 };
533 
534 /// Schedule types for 'omp for' loops (these enumerators are taken from
535 /// the enum sched_type in kmp.h).
536 enum OpenMPSchedType {
537   /// Lower bound for default (unordered) versions.
538   OMP_sch_lower = 32,
539   OMP_sch_static_chunked = 33,
540   OMP_sch_static = 34,
541   OMP_sch_dynamic_chunked = 35,
542   OMP_sch_guided_chunked = 36,
543   OMP_sch_runtime = 37,
544   OMP_sch_auto = 38,
545   /// static with chunk adjustment (e.g., simd)
546   OMP_sch_static_balanced_chunked = 45,
547   /// Lower bound for 'ordered' versions.
548   OMP_ord_lower = 64,
549   OMP_ord_static_chunked = 65,
550   OMP_ord_static = 66,
551   OMP_ord_dynamic_chunked = 67,
552   OMP_ord_guided_chunked = 68,
553   OMP_ord_runtime = 69,
554   OMP_ord_auto = 70,
555   OMP_sch_default = OMP_sch_static,
556   /// dist_schedule types
557   OMP_dist_sch_static_chunked = 91,
558   OMP_dist_sch_static = 92,
559   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
560   /// Set if the monotonic schedule modifier was present.
561   OMP_sch_modifier_monotonic = (1 << 29),
562   /// Set if the nonmonotonic schedule modifier was present.
563   OMP_sch_modifier_nonmonotonic = (1 << 30),
564 };
565 
566 enum OpenMPRTLFunction {
567   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
568   /// kmpc_micro microtask, ...);
569   OMPRTL__kmpc_fork_call,
570   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
571   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
572   OMPRTL__kmpc_threadprivate_cached,
573   /// Call to void __kmpc_threadprivate_register( ident_t *,
574   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
575   OMPRTL__kmpc_threadprivate_register,
576   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
577   OMPRTL__kmpc_global_thread_num,
578   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
579   // kmp_critical_name *crit);
580   OMPRTL__kmpc_critical,
581   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
582   // global_tid, kmp_critical_name *crit, uintptr_t hint);
583   OMPRTL__kmpc_critical_with_hint,
584   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
585   // kmp_critical_name *crit);
586   OMPRTL__kmpc_end_critical,
587   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_cancel_barrier,
590   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
591   OMPRTL__kmpc_barrier,
592   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
593   OMPRTL__kmpc_for_static_fini,
594   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
595   // global_tid);
596   OMPRTL__kmpc_serialized_parallel,
597   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
598   // global_tid);
599   OMPRTL__kmpc_end_serialized_parallel,
600   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
601   // kmp_int32 num_threads);
602   OMPRTL__kmpc_push_num_threads,
603   // Call to void __kmpc_flush(ident_t *loc);
604   OMPRTL__kmpc_flush,
605   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_master,
607   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_master,
609   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
610   // int end_part);
611   OMPRTL__kmpc_omp_taskyield,
612   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
613   OMPRTL__kmpc_single,
614   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
615   OMPRTL__kmpc_end_single,
616   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
617   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
618   // kmp_routine_entry_t *task_entry);
619   OMPRTL__kmpc_omp_task_alloc,
620   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
621   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
622   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
623   // kmp_int64 device_id);
624   OMPRTL__kmpc_omp_target_task_alloc,
625   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
626   // new_task);
627   OMPRTL__kmpc_omp_task,
628   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
629   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
630   // kmp_int32 didit);
631   OMPRTL__kmpc_copyprivate,
632   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
633   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
634   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
635   OMPRTL__kmpc_reduce,
636   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
637   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
638   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
639   // *lck);
640   OMPRTL__kmpc_reduce_nowait,
641   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
642   // kmp_critical_name *lck);
643   OMPRTL__kmpc_end_reduce,
644   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
645   // kmp_critical_name *lck);
646   OMPRTL__kmpc_end_reduce_nowait,
647   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
648   // kmp_task_t * new_task);
649   OMPRTL__kmpc_omp_task_begin_if0,
650   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
651   // kmp_task_t * new_task);
652   OMPRTL__kmpc_omp_task_complete_if0,
653   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_ordered,
655   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_ordered,
657   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
658   // global_tid);
659   OMPRTL__kmpc_omp_taskwait,
660   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
661   OMPRTL__kmpc_taskgroup,
662   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
663   OMPRTL__kmpc_end_taskgroup,
664   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
665   // int proc_bind);
666   OMPRTL__kmpc_push_proc_bind,
667   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
668   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
669   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
670   OMPRTL__kmpc_omp_task_with_deps,
671   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
672   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
673   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
674   OMPRTL__kmpc_omp_wait_deps,
675   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
676   // global_tid, kmp_int32 cncl_kind);
677   OMPRTL__kmpc_cancellationpoint,
678   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
679   // kmp_int32 cncl_kind);
680   OMPRTL__kmpc_cancel,
681   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
682   // kmp_int32 num_teams, kmp_int32 thread_limit);
683   OMPRTL__kmpc_push_num_teams,
684   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
685   // microtask, ...);
686   OMPRTL__kmpc_fork_teams,
687   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
688   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
689   // sched, kmp_uint64 grainsize, void *task_dup);
690   OMPRTL__kmpc_taskloop,
691   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
692   // num_dims, struct kmp_dim *dims);
693   OMPRTL__kmpc_doacross_init,
694   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
695   OMPRTL__kmpc_doacross_fini,
696   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
697   // *vec);
698   OMPRTL__kmpc_doacross_post,
699   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
700   // *vec);
701   OMPRTL__kmpc_doacross_wait,
702   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
703   // *data);
704   OMPRTL__kmpc_task_reduction_init,
705   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
706   // *d);
707   OMPRTL__kmpc_task_reduction_get_th_data,
708   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
709   OMPRTL__kmpc_alloc,
710   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
711   OMPRTL__kmpc_free,
712 
713   //
714   // Offloading related calls
715   //
716   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
717   // size);
718   OMPRTL__kmpc_push_target_tripcount,
719   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
720   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
721   // *arg_types);
722   OMPRTL__tgt_target,
723   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
724   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
725   // *arg_types);
726   OMPRTL__tgt_target_nowait,
727   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
728   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
729   // *arg_types, int32_t num_teams, int32_t thread_limit);
730   OMPRTL__tgt_target_teams,
731   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
732   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
733   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
734   OMPRTL__tgt_target_teams_nowait,
735   // Call to void __tgt_register_requires(int64_t flags);
736   OMPRTL__tgt_register_requires,
737   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
738   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
739   OMPRTL__tgt_target_data_begin,
740   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
741   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
742   // *arg_types);
743   OMPRTL__tgt_target_data_begin_nowait,
744   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
745   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
746   OMPRTL__tgt_target_data_end,
747   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
748   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
749   // *arg_types);
750   OMPRTL__tgt_target_data_end_nowait,
751   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
752   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
753   OMPRTL__tgt_target_data_update,
754   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
755   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
756   // *arg_types);
757   OMPRTL__tgt_target_data_update_nowait,
758   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
759   OMPRTL__tgt_mapper_num_components,
760   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
761   // *base, void *begin, int64_t size, int64_t type);
762   OMPRTL__tgt_push_mapper_component,
763 };
764 
765 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
766 /// region.
767 class CleanupTy final : public EHScopeStack::Cleanup {
768   PrePostActionTy *Action;
769 
770 public:
771   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
772   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
773     if (!CGF.HaveInsertPoint())
774       return;
775     Action->Exit(CGF);
776   }
777 };
778 
779 } // anonymous namespace
780 
781 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
782   CodeGenFunction::RunCleanupsScope Scope(CGF);
783   if (PrePostAction) {
784     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
785     Callback(CodeGen, CGF, *PrePostAction);
786   } else {
787     PrePostActionTy Action;
788     Callback(CodeGen, CGF, Action);
789   }
790 }
791 
792 /// Check if the combiner is a call to UDR combiner and if it is so return the
793 /// UDR decl used for reduction.
794 static const OMPDeclareReductionDecl *
795 getReductionInit(const Expr *ReductionOp) {
796   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
797     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
798       if (const auto *DRE =
799               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
800         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
801           return DRD;
802   return nullptr;
803 }
804 
805 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
806                                              const OMPDeclareReductionDecl *DRD,
807                                              const Expr *InitOp,
808                                              Address Private, Address Original,
809                                              QualType Ty) {
810   if (DRD->getInitializer()) {
811     std::pair<llvm::Function *, llvm::Function *> Reduction =
812         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
813     const auto *CE = cast<CallExpr>(InitOp);
814     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
815     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
816     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
817     const auto *LHSDRE =
818         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
819     const auto *RHSDRE =
820         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
821     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
822     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
823                             [=]() { return Private; });
824     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
825                             [=]() { return Original; });
826     (void)PrivateScope.Privatize();
827     RValue Func = RValue::get(Reduction.second);
828     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
829     CGF.EmitIgnoredExpr(InitOp);
830   } else {
831     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
832     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
833     auto *GV = new llvm::GlobalVariable(
834         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
835         llvm::GlobalValue::PrivateLinkage, Init, Name);
836     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
837     RValue InitRVal;
838     switch (CGF.getEvaluationKind(Ty)) {
839     case TEK_Scalar:
840       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
841       break;
842     case TEK_Complex:
843       InitRVal =
844           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
845       break;
846     case TEK_Aggregate:
847       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
848       break;
849     }
850     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
851     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
852     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
853                          /*IsInitializer=*/false);
854   }
855 }
856 
857 /// Emit initialization of arrays of complex types.
858 /// \param DestAddr Address of the array.
859 /// \param Type Type of array.
860 /// \param Init Initial expression of array.
861 /// \param SrcAddr Address of the original array.
862 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
863                                  QualType Type, bool EmitDeclareReductionInit,
864                                  const Expr *Init,
865                                  const OMPDeclareReductionDecl *DRD,
866                                  Address SrcAddr = Address::invalid()) {
867   // Perform element-by-element initialization.
868   QualType ElementTy;
869 
870   // Drill down to the base element type on both arrays.
871   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
872   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
873   DestAddr =
874       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
875   if (DRD)
876     SrcAddr =
877         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
878 
879   llvm::Value *SrcBegin = nullptr;
880   if (DRD)
881     SrcBegin = SrcAddr.getPointer();
882   llvm::Value *DestBegin = DestAddr.getPointer();
883   // Cast from pointer to array type to pointer to single element.
884   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
885   // The basic structure here is a while-do loop.
886   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
887   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
888   llvm::Value *IsEmpty =
889       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
890   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
891 
892   // Enter the loop body, making that address the current address.
893   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
894   CGF.EmitBlock(BodyBB);
895 
896   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
897 
898   llvm::PHINode *SrcElementPHI = nullptr;
899   Address SrcElementCurrent = Address::invalid();
900   if (DRD) {
901     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
902                                           "omp.arraycpy.srcElementPast");
903     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
904     SrcElementCurrent =
905         Address(SrcElementPHI,
906                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
907   }
908   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
909       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
910   DestElementPHI->addIncoming(DestBegin, EntryBB);
911   Address DestElementCurrent =
912       Address(DestElementPHI,
913               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
914 
915   // Emit copy.
916   {
917     CodeGenFunction::RunCleanupsScope InitScope(CGF);
918     if (EmitDeclareReductionInit) {
919       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
920                                        SrcElementCurrent, ElementTy);
921     } else
922       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
923                            /*IsInitializer=*/false);
924   }
925 
926   if (DRD) {
927     // Shift the address forward by one element.
928     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
929         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
930     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
931   }
932 
933   // Shift the address forward by one element.
934   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
935       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
936   // Check whether we've reached the end.
937   llvm::Value *Done =
938       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
939   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
940   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
941 
942   // Done.
943   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
944 }
945 
946 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
947   return CGF.EmitOMPSharedLValue(E);
948 }
949 
950 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
951                                             const Expr *E) {
952   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
953     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
954   return LValue();
955 }
956 
957 void ReductionCodeGen::emitAggregateInitialization(
958     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
959     const OMPDeclareReductionDecl *DRD) {
960   // Emit VarDecl with copy init for arrays.
961   // Get the address of the original variable captured in current
962   // captured region.
963   const auto *PrivateVD =
964       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
965   bool EmitDeclareReductionInit =
966       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
967   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
968                        EmitDeclareReductionInit,
969                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
970                                                 : PrivateVD->getInit(),
971                        DRD, SharedLVal.getAddress(CGF));
972 }
973 
974 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
975                                    ArrayRef<const Expr *> Privates,
976                                    ArrayRef<const Expr *> ReductionOps) {
977   ClausesData.reserve(Shareds.size());
978   SharedAddresses.reserve(Shareds.size());
979   Sizes.reserve(Shareds.size());
980   BaseDecls.reserve(Shareds.size());
981   auto IPriv = Privates.begin();
982   auto IRed = ReductionOps.begin();
983   for (const Expr *Ref : Shareds) {
984     ClausesData.emplace_back(Ref, *IPriv, *IRed);
985     std::advance(IPriv, 1);
986     std::advance(IRed, 1);
987   }
988 }
989 
990 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
991   assert(SharedAddresses.size() == N &&
992          "Number of generated lvalues must be exactly N.");
993   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
994   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
995   SharedAddresses.emplace_back(First, Second);
996 }
997 
998 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
999   const auto *PrivateVD =
1000       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1001   QualType PrivateType = PrivateVD->getType();
1002   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1003   if (!PrivateType->isVariablyModifiedType()) {
1004     Sizes.emplace_back(
1005         CGF.getTypeSize(
1006             SharedAddresses[N].first.getType().getNonReferenceType()),
1007         nullptr);
1008     return;
1009   }
1010   llvm::Value *Size;
1011   llvm::Value *SizeInChars;
1012   auto *ElemType = cast<llvm::PointerType>(
1013                        SharedAddresses[N].first.getPointer(CGF)->getType())
1014                        ->getElementType();
1015   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1016   if (AsArraySection) {
1017     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1018                                      SharedAddresses[N].first.getPointer(CGF));
1019     Size = CGF.Builder.CreateNUWAdd(
1020         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1021     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1022   } else {
1023     SizeInChars = CGF.getTypeSize(
1024         SharedAddresses[N].first.getType().getNonReferenceType());
1025     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1026   }
1027   Sizes.emplace_back(SizeInChars, Size);
1028   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1029       CGF,
1030       cast<OpaqueValueExpr>(
1031           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1032       RValue::get(Size));
1033   CGF.EmitVariablyModifiedType(PrivateType);
1034 }
1035 
1036 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1037                                          llvm::Value *Size) {
1038   const auto *PrivateVD =
1039       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1040   QualType PrivateType = PrivateVD->getType();
1041   if (!PrivateType->isVariablyModifiedType()) {
1042     assert(!Size && !Sizes[N].second &&
1043            "Size should be nullptr for non-variably modified reduction "
1044            "items.");
1045     return;
1046   }
1047   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1048       CGF,
1049       cast<OpaqueValueExpr>(
1050           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1051       RValue::get(Size));
1052   CGF.EmitVariablyModifiedType(PrivateType);
1053 }
1054 
1055 void ReductionCodeGen::emitInitialization(
1056     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1057     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1058   assert(SharedAddresses.size() > N && "No variable was generated");
1059   const auto *PrivateVD =
1060       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1061   const OMPDeclareReductionDecl *DRD =
1062       getReductionInit(ClausesData[N].ReductionOp);
1063   QualType PrivateType = PrivateVD->getType();
1064   PrivateAddr = CGF.Builder.CreateElementBitCast(
1065       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1066   QualType SharedType = SharedAddresses[N].first.getType();
1067   SharedLVal = CGF.MakeAddrLValue(
1068       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1069                                        CGF.ConvertTypeForMem(SharedType)),
1070       SharedType, SharedAddresses[N].first.getBaseInfo(),
1071       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1072   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1073     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1074   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1075     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1076                                      PrivateAddr, SharedLVal.getAddress(CGF),
1077                                      SharedLVal.getType());
1078   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1079              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1080     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1081                          PrivateVD->getType().getQualifiers(),
1082                          /*IsInitializer=*/false);
1083   }
1084 }
1085 
1086 bool ReductionCodeGen::needCleanups(unsigned N) {
1087   const auto *PrivateVD =
1088       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1089   QualType PrivateType = PrivateVD->getType();
1090   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1091   return DTorKind != QualType::DK_none;
1092 }
1093 
1094 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1095                                     Address PrivateAddr) {
1096   const auto *PrivateVD =
1097       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1098   QualType PrivateType = PrivateVD->getType();
1099   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1100   if (needCleanups(N)) {
1101     PrivateAddr = CGF.Builder.CreateElementBitCast(
1102         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1103     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1104   }
1105 }
1106 
1107 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1108                           LValue BaseLV) {
1109   BaseTy = BaseTy.getNonReferenceType();
1110   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1111          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1112     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1113       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1114     } else {
1115       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1116       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1117     }
1118     BaseTy = BaseTy->getPointeeType();
1119   }
1120   return CGF.MakeAddrLValue(
1121       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1122                                        CGF.ConvertTypeForMem(ElTy)),
1123       BaseLV.getType(), BaseLV.getBaseInfo(),
1124       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1125 }
1126 
1127 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1128                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1129                           llvm::Value *Addr) {
1130   Address Tmp = Address::invalid();
1131   Address TopTmp = Address::invalid();
1132   Address MostTopTmp = Address::invalid();
1133   BaseTy = BaseTy.getNonReferenceType();
1134   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1135          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1136     Tmp = CGF.CreateMemTemp(BaseTy);
1137     if (TopTmp.isValid())
1138       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1139     else
1140       MostTopTmp = Tmp;
1141     TopTmp = Tmp;
1142     BaseTy = BaseTy->getPointeeType();
1143   }
1144   llvm::Type *Ty = BaseLVType;
1145   if (Tmp.isValid())
1146     Ty = Tmp.getElementType();
1147   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1148   if (Tmp.isValid()) {
1149     CGF.Builder.CreateStore(Addr, Tmp);
1150     return MostTopTmp;
1151   }
1152   return Address(Addr, BaseLVAlignment);
1153 }
1154 
1155 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1156   const VarDecl *OrigVD = nullptr;
1157   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1158     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1159     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1160       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1161     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1162       Base = TempASE->getBase()->IgnoreParenImpCasts();
1163     DE = cast<DeclRefExpr>(Base);
1164     OrigVD = cast<VarDecl>(DE->getDecl());
1165   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1166     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1167     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1168       Base = TempASE->getBase()->IgnoreParenImpCasts();
1169     DE = cast<DeclRefExpr>(Base);
1170     OrigVD = cast<VarDecl>(DE->getDecl());
1171   }
1172   return OrigVD;
1173 }
1174 
1175 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1176                                                Address PrivateAddr) {
1177   const DeclRefExpr *DE;
1178   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1179     BaseDecls.emplace_back(OrigVD);
1180     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1181     LValue BaseLValue =
1182         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1183                     OriginalBaseLValue);
1184     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1185         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1186     llvm::Value *PrivatePointer =
1187         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1188             PrivateAddr.getPointer(),
1189             SharedAddresses[N].first.getAddress(CGF).getType());
1190     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1191     return castToBase(CGF, OrigVD->getType(),
1192                       SharedAddresses[N].first.getType(),
1193                       OriginalBaseLValue.getAddress(CGF).getType(),
1194                       OriginalBaseLValue.getAlignment(), Ptr);
1195   }
1196   BaseDecls.emplace_back(
1197       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1198   return PrivateAddr;
1199 }
1200 
1201 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1202   const OMPDeclareReductionDecl *DRD =
1203       getReductionInit(ClausesData[N].ReductionOp);
1204   return DRD && DRD->getInitializer();
1205 }
1206 
1207 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1208   return CGF.EmitLoadOfPointerLValue(
1209       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1210       getThreadIDVariable()->getType()->castAs<PointerType>());
1211 }
1212 
1213 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1214   if (!CGF.HaveInsertPoint())
1215     return;
1216   // 1.2.2 OpenMP Language Terminology
1217   // Structured block - An executable statement with a single entry at the
1218   // top and a single exit at the bottom.
1219   // The point of exit cannot be a branch out of the structured block.
1220   // longjmp() and throw() must not violate the entry/exit criteria.
1221   CGF.EHStack.pushTerminate();
1222   CodeGen(CGF);
1223   CGF.EHStack.popTerminate();
1224 }
1225 
1226 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1227     CodeGenFunction &CGF) {
1228   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1229                             getThreadIDVariable()->getType(),
1230                             AlignmentSource::Decl);
1231 }
1232 
1233 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1234                                        QualType FieldTy) {
1235   auto *Field = FieldDecl::Create(
1236       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1237       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1238       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1239   Field->setAccess(AS_public);
1240   DC->addDecl(Field);
1241   return Field;
1242 }
1243 
1244 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1245                                  StringRef Separator)
1246     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1247       OffloadEntriesInfoManager(CGM) {
1248   ASTContext &C = CGM.getContext();
1249   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1250   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1251   RD->startDefinition();
1252   // reserved_1
1253   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1254   // flags
1255   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1256   // reserved_2
1257   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1258   // reserved_3
1259   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1260   // psource
1261   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1262   RD->completeDefinition();
1263   IdentQTy = C.getRecordType(RD);
1264   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1265   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1266 
1267   loadOffloadInfoMetadata();
1268 }
1269 
1270 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1271                                             const GlobalDecl &OldGD,
1272                                             llvm::GlobalValue *OrigAddr,
1273                                             bool IsForDefinition) {
1274   // Emit at least a definition for the aliasee if the the address of the
1275   // original function is requested.
1276   if (IsForDefinition || OrigAddr)
1277     (void)CGM.GetAddrOfGlobal(NewGD);
1278   StringRef NewMangledName = CGM.getMangledName(NewGD);
1279   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1280   if (Addr && !Addr->isDeclaration()) {
1281     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1282     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1283     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1284 
1285     // Create a reference to the named value.  This ensures that it is emitted
1286     // if a deferred decl.
1287     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1288 
1289     // Create the new alias itself, but don't set a name yet.
1290     auto *GA =
1291         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1292 
1293     if (OrigAddr) {
1294       assert(OrigAddr->isDeclaration() && "Expected declaration");
1295 
1296       GA->takeName(OrigAddr);
1297       OrigAddr->replaceAllUsesWith(
1298           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1299       OrigAddr->eraseFromParent();
1300     } else {
1301       GA->setName(CGM.getMangledName(OldGD));
1302     }
1303 
1304     // Set attributes which are particular to an alias; this is a
1305     // specialization of the attributes which may be set on a global function.
1306     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1307         D->isWeakImported())
1308       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1309 
1310     CGM.SetCommonAttributes(OldGD, GA);
1311     return true;
1312   }
1313   return false;
1314 }
1315 
1316 void CGOpenMPRuntime::clear() {
1317   InternalVars.clear();
1318   // Clean non-target variable declarations possibly used only in debug info.
1319   for (const auto &Data : EmittedNonTargetVariables) {
1320     if (!Data.getValue().pointsToAliveValue())
1321       continue;
1322     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1323     if (!GV)
1324       continue;
1325     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1326       continue;
1327     GV->eraseFromParent();
1328   }
1329   // Emit aliases for the deferred aliasees.
1330   for (const auto &Pair : DeferredVariantFunction) {
1331     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1332     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1333     // If not able to emit alias, just emit original declaration.
1334     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1335                                 /*IsForDefinition=*/false);
1336   }
1337 }
1338 
1339 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1340   SmallString<128> Buffer;
1341   llvm::raw_svector_ostream OS(Buffer);
1342   StringRef Sep = FirstSeparator;
1343   for (StringRef Part : Parts) {
1344     OS << Sep << Part;
1345     Sep = Separator;
1346   }
1347   return std::string(OS.str());
1348 }
1349 
1350 static llvm::Function *
1351 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1352                           const Expr *CombinerInitializer, const VarDecl *In,
1353                           const VarDecl *Out, bool IsCombiner) {
1354   // void .omp_combiner.(Ty *in, Ty *out);
1355   ASTContext &C = CGM.getContext();
1356   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1357   FunctionArgList Args;
1358   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1359                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1360   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1361                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1362   Args.push_back(&OmpOutParm);
1363   Args.push_back(&OmpInParm);
1364   const CGFunctionInfo &FnInfo =
1365       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1366   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1367   std::string Name = CGM.getOpenMPRuntime().getName(
1368       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1369   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1370                                     Name, &CGM.getModule());
1371   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1372   if (CGM.getLangOpts().Optimize) {
1373     Fn->removeFnAttr(llvm::Attribute::NoInline);
1374     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1375     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1376   }
1377   CodeGenFunction CGF(CGM);
1378   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1379   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1380   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1381                     Out->getLocation());
1382   CodeGenFunction::OMPPrivateScope Scope(CGF);
1383   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1384   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1385     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1386         .getAddress(CGF);
1387   });
1388   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1389   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1390     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1391         .getAddress(CGF);
1392   });
1393   (void)Scope.Privatize();
1394   if (!IsCombiner && Out->hasInit() &&
1395       !CGF.isTrivialInitializer(Out->getInit())) {
1396     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1397                          Out->getType().getQualifiers(),
1398                          /*IsInitializer=*/true);
1399   }
1400   if (CombinerInitializer)
1401     CGF.EmitIgnoredExpr(CombinerInitializer);
1402   Scope.ForceCleanup();
1403   CGF.FinishFunction();
1404   return Fn;
1405 }
1406 
1407 void CGOpenMPRuntime::emitUserDefinedReduction(
1408     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1409   if (UDRMap.count(D) > 0)
1410     return;
1411   llvm::Function *Combiner = emitCombinerOrInitializer(
1412       CGM, D->getType(), D->getCombiner(),
1413       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1414       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1415       /*IsCombiner=*/true);
1416   llvm::Function *Initializer = nullptr;
1417   if (const Expr *Init = D->getInitializer()) {
1418     Initializer = emitCombinerOrInitializer(
1419         CGM, D->getType(),
1420         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1421                                                                      : nullptr,
1422         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1423         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1424         /*IsCombiner=*/false);
1425   }
1426   UDRMap.try_emplace(D, Combiner, Initializer);
1427   if (CGF) {
1428     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1429     Decls.second.push_back(D);
1430   }
1431 }
1432 
1433 std::pair<llvm::Function *, llvm::Function *>
1434 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1435   auto I = UDRMap.find(D);
1436   if (I != UDRMap.end())
1437     return I->second;
1438   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1439   return UDRMap.lookup(D);
1440 }
1441 
1442 namespace {
1443 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1444 // Builder if one is present.
1445 struct PushAndPopStackRAII {
1446   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1447                       bool HasCancel)
1448       : OMPBuilder(OMPBuilder) {
1449     if (!OMPBuilder)
1450       return;
1451 
1452     // The following callback is the crucial part of clangs cleanup process.
1453     //
1454     // NOTE:
1455     // Once the OpenMPIRBuilder is used to create parallel regions (and
1456     // similar), the cancellation destination (Dest below) is determined via
1457     // IP. That means if we have variables to finalize we split the block at IP,
1458     // use the new block (=BB) as destination to build a JumpDest (via
1459     // getJumpDestInCurrentScope(BB)) which then is fed to
1460     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1461     // to push & pop an FinalizationInfo object.
1462     // The FiniCB will still be needed but at the point where the
1463     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1464     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1465       assert(IP.getBlock()->end() == IP.getPoint() &&
1466              "Clang CG should cause non-terminated block!");
1467       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1468       CGF.Builder.restoreIP(IP);
1469       CodeGenFunction::JumpDest Dest =
1470           CGF.getOMPCancelDestination(OMPD_parallel);
1471       CGF.EmitBranchThroughCleanup(Dest);
1472     };
1473 
1474     // TODO: Remove this once we emit parallel regions through the
1475     //       OpenMPIRBuilder as it can do this setup internally.
1476     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1477         {FiniCB, OMPD_parallel, HasCancel});
1478     OMPBuilder->pushFinalizationCB(std::move(FI));
1479   }
1480   ~PushAndPopStackRAII() {
1481     if (OMPBuilder)
1482       OMPBuilder->popFinalizationCB();
1483   }
1484   llvm::OpenMPIRBuilder *OMPBuilder;
1485 };
1486 } // namespace
1487 
1488 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1489     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1490     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1491     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1492   assert(ThreadIDVar->getType()->isPointerType() &&
1493          "thread id variable must be of type kmp_int32 *");
1494   CodeGenFunction CGF(CGM, true);
1495   bool HasCancel = false;
1496   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1497     HasCancel = OPD->hasCancel();
1498   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1499     HasCancel = OPSD->hasCancel();
1500   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1501     HasCancel = OPFD->hasCancel();
1502   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1503     HasCancel = OPFD->hasCancel();
1504   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1505     HasCancel = OPFD->hasCancel();
1506   else if (const auto *OPFD =
1507                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1508     HasCancel = OPFD->hasCancel();
1509   else if (const auto *OPFD =
1510                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1511     HasCancel = OPFD->hasCancel();
1512 
1513   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1514   //       parallel region to make cancellation barriers work properly.
1515   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1516   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1517   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1518                                     HasCancel, OutlinedHelperName);
1519   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1520   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1521 }
1522 
1523 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1524     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1525     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1526   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1527   return emitParallelOrTeamsOutlinedFunction(
1528       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1529 }
1530 
1531 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1532     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1533     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1534   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1535   return emitParallelOrTeamsOutlinedFunction(
1536       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1537 }
1538 
1539 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1540     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1541     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1542     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1543     bool Tied, unsigned &NumberOfParts) {
1544   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1545                                               PrePostActionTy &) {
1546     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1547     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1548     llvm::Value *TaskArgs[] = {
1549         UpLoc, ThreadID,
1550         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1551                                     TaskTVar->getType()->castAs<PointerType>())
1552             .getPointer(CGF)};
1553     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1554   };
1555   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1556                                                             UntiedCodeGen);
1557   CodeGen.setAction(Action);
1558   assert(!ThreadIDVar->getType()->isPointerType() &&
1559          "thread id variable must be of type kmp_int32 for tasks");
1560   const OpenMPDirectiveKind Region =
1561       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1562                                                       : OMPD_task;
1563   const CapturedStmt *CS = D.getCapturedStmt(Region);
1564   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1565   CodeGenFunction CGF(CGM, true);
1566   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1567                                         InnermostKind,
1568                                         TD ? TD->hasCancel() : false, Action);
1569   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1570   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1571   if (!Tied)
1572     NumberOfParts = Action.getNumberOfParts();
1573   return Res;
1574 }
1575 
1576 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1577                              const RecordDecl *RD, const CGRecordLayout &RL,
1578                              ArrayRef<llvm::Constant *> Data) {
1579   llvm::StructType *StructTy = RL.getLLVMType();
1580   unsigned PrevIdx = 0;
1581   ConstantInitBuilder CIBuilder(CGM);
1582   auto DI = Data.begin();
1583   for (const FieldDecl *FD : RD->fields()) {
1584     unsigned Idx = RL.getLLVMFieldNo(FD);
1585     // Fill the alignment.
1586     for (unsigned I = PrevIdx; I < Idx; ++I)
1587       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1588     PrevIdx = Idx + 1;
1589     Fields.add(*DI);
1590     ++DI;
1591   }
1592 }
1593 
1594 template <class... As>
1595 static llvm::GlobalVariable *
1596 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1597                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1598                    As &&... Args) {
1599   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1600   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1601   ConstantInitBuilder CIBuilder(CGM);
1602   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1603   buildStructValue(Fields, CGM, RD, RL, Data);
1604   return Fields.finishAndCreateGlobal(
1605       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1606       std::forward<As>(Args)...);
1607 }
1608 
1609 template <typename T>
1610 static void
1611 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1612                                          ArrayRef<llvm::Constant *> Data,
1613                                          T &Parent) {
1614   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1615   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1616   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1617   buildStructValue(Fields, CGM, RD, RL, Data);
1618   Fields.finishAndAddTo(Parent);
1619 }
1620 
1621 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1622   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1623   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1624   FlagsTy FlagsKey(Flags, Reserved2Flags);
1625   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1626   if (!Entry) {
1627     if (!DefaultOpenMPPSource) {
1628       // Initialize default location for psource field of ident_t structure of
1629       // all ident_t objects. Format is ";file;function;line;column;;".
1630       // Taken from
1631       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1632       DefaultOpenMPPSource =
1633           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1634       DefaultOpenMPPSource =
1635           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1636     }
1637 
1638     llvm::Constant *Data[] = {
1639         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1640         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1641         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1642         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1643     llvm::GlobalValue *DefaultOpenMPLocation =
1644         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1645                            llvm::GlobalValue::PrivateLinkage);
1646     DefaultOpenMPLocation->setUnnamedAddr(
1647         llvm::GlobalValue::UnnamedAddr::Global);
1648 
1649     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1650   }
1651   return Address(Entry, Align);
1652 }
1653 
1654 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1655                                              bool AtCurrentPoint) {
1656   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1657   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1658 
1659   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1660   if (AtCurrentPoint) {
1661     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1662         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1663   } else {
1664     Elem.second.ServiceInsertPt =
1665         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1666     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1667   }
1668 }
1669 
1670 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1671   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1672   if (Elem.second.ServiceInsertPt) {
1673     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1674     Elem.second.ServiceInsertPt = nullptr;
1675     Ptr->eraseFromParent();
1676   }
1677 }
1678 
1679 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1680                                                  SourceLocation Loc,
1681                                                  unsigned Flags) {
1682   Flags |= OMP_IDENT_KMPC;
1683   // If no debug info is generated - return global default location.
1684   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1685       Loc.isInvalid())
1686     return getOrCreateDefaultLocation(Flags).getPointer();
1687 
1688   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1689 
1690   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1691   Address LocValue = Address::invalid();
1692   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1693   if (I != OpenMPLocThreadIDMap.end())
1694     LocValue = Address(I->second.DebugLoc, Align);
1695 
1696   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1697   // GetOpenMPThreadID was called before this routine.
1698   if (!LocValue.isValid()) {
1699     // Generate "ident_t .kmpc_loc.addr;"
1700     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1701     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1702     Elem.second.DebugLoc = AI.getPointer();
1703     LocValue = AI;
1704 
1705     if (!Elem.second.ServiceInsertPt)
1706       setLocThreadIdInsertPt(CGF);
1707     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1708     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1709     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1710                              CGF.getTypeSize(IdentQTy));
1711   }
1712 
1713   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1714   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1715   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1716   LValue PSource =
1717       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1718 
1719   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1720   if (OMPDebugLoc == nullptr) {
1721     SmallString<128> Buffer2;
1722     llvm::raw_svector_ostream OS2(Buffer2);
1723     // Build debug location
1724     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1725     OS2 << ";" << PLoc.getFilename() << ";";
1726     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1727       OS2 << FD->getQualifiedNameAsString();
1728     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1729     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1730     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1731   }
1732   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1733   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1734 
1735   // Our callers always pass this to a runtime function, so for
1736   // convenience, go ahead and return a naked pointer.
1737   return LocValue.getPointer();
1738 }
1739 
1740 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1741                                           SourceLocation Loc) {
1742   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1743 
1744   llvm::Value *ThreadID = nullptr;
1745   // Check whether we've already cached a load of the thread id in this
1746   // function.
1747   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1748   if (I != OpenMPLocThreadIDMap.end()) {
1749     ThreadID = I->second.ThreadID;
1750     if (ThreadID != nullptr)
1751       return ThreadID;
1752   }
1753   // If exceptions are enabled, do not use parameter to avoid possible crash.
1754   if (auto *OMPRegionInfo =
1755           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1756     if (OMPRegionInfo->getThreadIDVariable()) {
1757       // Check if this an outlined function with thread id passed as argument.
1758       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1759       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1760       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1761           !CGF.getLangOpts().CXXExceptions ||
1762           CGF.Builder.GetInsertBlock() == TopBlock ||
1763           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1764           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1765               TopBlock ||
1766           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1767               CGF.Builder.GetInsertBlock()) {
1768         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1769         // If value loaded in entry block, cache it and use it everywhere in
1770         // function.
1771         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1772           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1773           Elem.second.ThreadID = ThreadID;
1774         }
1775         return ThreadID;
1776       }
1777     }
1778   }
1779 
1780   // This is not an outlined function region - need to call __kmpc_int32
1781   // kmpc_global_thread_num(ident_t *loc).
1782   // Generate thread id value and cache this value for use across the
1783   // function.
1784   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1785   if (!Elem.second.ServiceInsertPt)
1786     setLocThreadIdInsertPt(CGF);
1787   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1788   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1789   llvm::CallInst *Call = CGF.Builder.CreateCall(
1790       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1791       emitUpdateLocation(CGF, Loc));
1792   Call->setCallingConv(CGF.getRuntimeCC());
1793   Elem.second.ThreadID = Call;
1794   return Call;
1795 }
1796 
1797 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1798   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1799   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1800     clearLocThreadIdInsertPt(CGF);
1801     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1802   }
1803   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1804     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1805       UDRMap.erase(D);
1806     FunctionUDRMap.erase(CGF.CurFn);
1807   }
1808   auto I = FunctionUDMMap.find(CGF.CurFn);
1809   if (I != FunctionUDMMap.end()) {
1810     for(const auto *D : I->second)
1811       UDMMap.erase(D);
1812     FunctionUDMMap.erase(I);
1813   }
1814   LastprivateConditionalToTypes.erase(CGF.CurFn);
1815 }
1816 
1817 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1818   return IdentTy->getPointerTo();
1819 }
1820 
1821 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1822   if (!Kmpc_MicroTy) {
1823     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1824     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1825                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1826     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1827   }
1828   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1829 }
1830 
1831 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1832   llvm::FunctionCallee RTLFn = nullptr;
1833   switch (static_cast<OpenMPRTLFunction>(Function)) {
1834   case OMPRTL__kmpc_fork_call: {
1835     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1836     // microtask, ...);
1837     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1838                                 getKmpc_MicroPointerTy()};
1839     auto *FnTy =
1840         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1841     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1842     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1843       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1844         llvm::LLVMContext &Ctx = F->getContext();
1845         llvm::MDBuilder MDB(Ctx);
1846         // Annotate the callback behavior of the __kmpc_fork_call:
1847         //  - The callback callee is argument number 2 (microtask).
1848         //  - The first two arguments of the callback callee are unknown (-1).
1849         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1850         //    callback callee.
1851         F->addMetadata(
1852             llvm::LLVMContext::MD_callback,
1853             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1854                                         2, {-1, -1},
1855                                         /* VarArgsArePassed */ true)}));
1856       }
1857     }
1858     break;
1859   }
1860   case OMPRTL__kmpc_global_thread_num: {
1861     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1862     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1863     auto *FnTy =
1864         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1865     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1866     break;
1867   }
1868   case OMPRTL__kmpc_threadprivate_cached: {
1869     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1870     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1871     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1872                                 CGM.VoidPtrTy, CGM.SizeTy,
1873                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1877     break;
1878   }
1879   case OMPRTL__kmpc_critical: {
1880     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1881     // kmp_critical_name *crit);
1882     llvm::Type *TypeParams[] = {
1883         getIdentTyPointerTy(), CGM.Int32Ty,
1884         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1888     break;
1889   }
1890   case OMPRTL__kmpc_critical_with_hint: {
1891     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1892     // kmp_critical_name *crit, uintptr_t hint);
1893     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1894                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1895                                 CGM.IntPtrTy};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1899     break;
1900   }
1901   case OMPRTL__kmpc_threadprivate_register: {
1902     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1903     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1904     // typedef void *(*kmpc_ctor)(void *);
1905     auto *KmpcCtorTy =
1906         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1907                                 /*isVarArg*/ false)->getPointerTo();
1908     // typedef void *(*kmpc_cctor)(void *, void *);
1909     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1910     auto *KmpcCopyCtorTy =
1911         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1912                                 /*isVarArg*/ false)
1913             ->getPointerTo();
1914     // typedef void (*kmpc_dtor)(void *);
1915     auto *KmpcDtorTy =
1916         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1917             ->getPointerTo();
1918     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1919                               KmpcCopyCtorTy, KmpcDtorTy};
1920     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1921                                         /*isVarArg*/ false);
1922     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1923     break;
1924   }
1925   case OMPRTL__kmpc_end_critical: {
1926     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1927     // kmp_critical_name *crit);
1928     llvm::Type *TypeParams[] = {
1929         getIdentTyPointerTy(), CGM.Int32Ty,
1930         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1931     auto *FnTy =
1932         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1933     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1934     break;
1935   }
1936   case OMPRTL__kmpc_cancel_barrier: {
1937     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1938     // global_tid);
1939     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1940     auto *FnTy =
1941         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1942     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1943     break;
1944   }
1945   case OMPRTL__kmpc_barrier: {
1946     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1947     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1948     auto *FnTy =
1949         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1950     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1951     break;
1952   }
1953   case OMPRTL__kmpc_for_static_fini: {
1954     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1956     auto *FnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1958     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1959     break;
1960   }
1961   case OMPRTL__kmpc_push_num_threads: {
1962     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1963     // kmp_int32 num_threads)
1964     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965                                 CGM.Int32Ty};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1968     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1969     break;
1970   }
1971   case OMPRTL__kmpc_serialized_parallel: {
1972     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1973     // global_tid);
1974     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1975     auto *FnTy =
1976         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1977     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1978     break;
1979   }
1980   case OMPRTL__kmpc_end_serialized_parallel: {
1981     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1982     // global_tid);
1983     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1984     auto *FnTy =
1985         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1986     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1987     break;
1988   }
1989   case OMPRTL__kmpc_flush: {
1990     // Build void __kmpc_flush(ident_t *loc);
1991     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1992     auto *FnTy =
1993         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1994     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1995     break;
1996   }
1997   case OMPRTL__kmpc_master: {
1998     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1999     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2000     auto *FnTy =
2001         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2002     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2003     break;
2004   }
2005   case OMPRTL__kmpc_end_master: {
2006     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2007     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2008     auto *FnTy =
2009         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2010     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2011     break;
2012   }
2013   case OMPRTL__kmpc_omp_taskyield: {
2014     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2015     // int end_part);
2016     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2017     auto *FnTy =
2018         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2019     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2020     break;
2021   }
2022   case OMPRTL__kmpc_single: {
2023     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2024     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2025     auto *FnTy =
2026         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2027     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2028     break;
2029   }
2030   case OMPRTL__kmpc_end_single: {
2031     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2032     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2033     auto *FnTy =
2034         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2035     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2036     break;
2037   }
2038   case OMPRTL__kmpc_omp_task_alloc: {
2039     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2040     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2041     // kmp_routine_entry_t *task_entry);
2042     assert(KmpRoutineEntryPtrTy != nullptr &&
2043            "Type kmp_routine_entry_t must be created.");
2044     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2045                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2046     // Return void * and then cast to particular kmp_task_t type.
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2050     break;
2051   }
2052   case OMPRTL__kmpc_omp_target_task_alloc: {
2053     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2054     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2055     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2056     assert(KmpRoutineEntryPtrTy != nullptr &&
2057            "Type kmp_routine_entry_t must be created.");
2058     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2059                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2060                                 CGM.Int64Ty};
2061     // Return void * and then cast to particular kmp_task_t type.
2062     auto *FnTy =
2063         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2064     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2065     break;
2066   }
2067   case OMPRTL__kmpc_omp_task: {
2068     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2069     // *new_task);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2071                                 CGM.VoidPtrTy};
2072     auto *FnTy =
2073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2074     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2075     break;
2076   }
2077   case OMPRTL__kmpc_copyprivate: {
2078     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2079     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2080     // kmp_int32 didit);
2081     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2082     auto *CpyFnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2084     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2085                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2086                                 CGM.Int32Ty};
2087     auto *FnTy =
2088         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2089     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2090     break;
2091   }
2092   case OMPRTL__kmpc_reduce: {
2093     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2094     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2095     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2096     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2097     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2098                                                /*isVarArg=*/false);
2099     llvm::Type *TypeParams[] = {
2100         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2101         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2102         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2103     auto *FnTy =
2104         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2105     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2106     break;
2107   }
2108   case OMPRTL__kmpc_reduce_nowait: {
2109     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2110     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2111     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2112     // *lck);
2113     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2114     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2115                                                /*isVarArg=*/false);
2116     llvm::Type *TypeParams[] = {
2117         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2118         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2119         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2120     auto *FnTy =
2121         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2122     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2123     break;
2124   }
2125   case OMPRTL__kmpc_end_reduce: {
2126     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2127     // kmp_critical_name *lck);
2128     llvm::Type *TypeParams[] = {
2129         getIdentTyPointerTy(), CGM.Int32Ty,
2130         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2131     auto *FnTy =
2132         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2133     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2134     break;
2135   }
2136   case OMPRTL__kmpc_end_reduce_nowait: {
2137     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2138     // kmp_critical_name *lck);
2139     llvm::Type *TypeParams[] = {
2140         getIdentTyPointerTy(), CGM.Int32Ty,
2141         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2144     RTLFn =
2145         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2146     break;
2147   }
2148   case OMPRTL__kmpc_omp_task_begin_if0: {
2149     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2150     // *new_task);
2151     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2152                                 CGM.VoidPtrTy};
2153     auto *FnTy =
2154         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2155     RTLFn =
2156         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2157     break;
2158   }
2159   case OMPRTL__kmpc_omp_task_complete_if0: {
2160     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2161     // *new_task);
2162     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2163                                 CGM.VoidPtrTy};
2164     auto *FnTy =
2165         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2166     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2167                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2168     break;
2169   }
2170   case OMPRTL__kmpc_ordered: {
2171     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2173     auto *FnTy =
2174         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2175     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2176     break;
2177   }
2178   case OMPRTL__kmpc_end_ordered: {
2179     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2181     auto *FnTy =
2182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2183     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2184     break;
2185   }
2186   case OMPRTL__kmpc_omp_taskwait: {
2187     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2188     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2189     auto *FnTy =
2190         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2191     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2192     break;
2193   }
2194   case OMPRTL__kmpc_taskgroup: {
2195     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2196     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2197     auto *FnTy =
2198         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2199     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2200     break;
2201   }
2202   case OMPRTL__kmpc_end_taskgroup: {
2203     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2204     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2205     auto *FnTy =
2206         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2207     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2208     break;
2209   }
2210   case OMPRTL__kmpc_push_proc_bind: {
2211     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2212     // int proc_bind)
2213     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2214     auto *FnTy =
2215         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2216     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2217     break;
2218   }
2219   case OMPRTL__kmpc_omp_task_with_deps: {
2220     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2221     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2222     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2223     llvm::Type *TypeParams[] = {
2224         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2225         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2226     auto *FnTy =
2227         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2228     RTLFn =
2229         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2230     break;
2231   }
2232   case OMPRTL__kmpc_omp_wait_deps: {
2233     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2234     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2235     // kmp_depend_info_t *noalias_dep_list);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2238                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2241     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2242     break;
2243   }
2244   case OMPRTL__kmpc_cancellationpoint: {
2245     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2246     // global_tid, kmp_int32 cncl_kind)
2247     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2248     auto *FnTy =
2249         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2250     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2251     break;
2252   }
2253   case OMPRTL__kmpc_cancel: {
2254     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2255     // kmp_int32 cncl_kind)
2256     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2259     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2260     break;
2261   }
2262   case OMPRTL__kmpc_push_num_teams: {
2263     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2264     // kmp_int32 num_teams, kmp_int32 num_threads)
2265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2266         CGM.Int32Ty};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2270     break;
2271   }
2272   case OMPRTL__kmpc_fork_teams: {
2273     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2274     // microtask, ...);
2275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2276                                 getKmpc_MicroPointerTy()};
2277     auto *FnTy =
2278         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2279     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2280     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2281       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2282         llvm::LLVMContext &Ctx = F->getContext();
2283         llvm::MDBuilder MDB(Ctx);
2284         // Annotate the callback behavior of the __kmpc_fork_teams:
2285         //  - The callback callee is argument number 2 (microtask).
2286         //  - The first two arguments of the callback callee are unknown (-1).
2287         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2288         //    callback callee.
2289         F->addMetadata(
2290             llvm::LLVMContext::MD_callback,
2291             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2292                                         2, {-1, -1},
2293                                         /* VarArgsArePassed */ true)}));
2294       }
2295     }
2296     break;
2297   }
2298   case OMPRTL__kmpc_taskloop: {
2299     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2300     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2301     // sched, kmp_uint64 grainsize, void *task_dup);
2302     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2303                                 CGM.IntTy,
2304                                 CGM.VoidPtrTy,
2305                                 CGM.IntTy,
2306                                 CGM.Int64Ty->getPointerTo(),
2307                                 CGM.Int64Ty->getPointerTo(),
2308                                 CGM.Int64Ty,
2309                                 CGM.IntTy,
2310                                 CGM.IntTy,
2311                                 CGM.Int64Ty,
2312                                 CGM.VoidPtrTy};
2313     auto *FnTy =
2314         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2315     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2316     break;
2317   }
2318   case OMPRTL__kmpc_doacross_init: {
2319     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2320     // num_dims, struct kmp_dim *dims);
2321     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2322                                 CGM.Int32Ty,
2323                                 CGM.Int32Ty,
2324                                 CGM.VoidPtrTy};
2325     auto *FnTy =
2326         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2327     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2328     break;
2329   }
2330   case OMPRTL__kmpc_doacross_fini: {
2331     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2332     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2333     auto *FnTy =
2334         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2335     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2336     break;
2337   }
2338   case OMPRTL__kmpc_doacross_post: {
2339     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2340     // *vec);
2341     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2342                                 CGM.Int64Ty->getPointerTo()};
2343     auto *FnTy =
2344         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2345     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2346     break;
2347   }
2348   case OMPRTL__kmpc_doacross_wait: {
2349     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2350     // *vec);
2351     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2352                                 CGM.Int64Ty->getPointerTo()};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2356     break;
2357   }
2358   case OMPRTL__kmpc_task_reduction_init: {
2359     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2360     // *data);
2361     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2362     auto *FnTy =
2363         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2364     RTLFn =
2365         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2366     break;
2367   }
2368   case OMPRTL__kmpc_task_reduction_get_th_data: {
2369     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2370     // *d);
2371     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2372     auto *FnTy =
2373         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2374     RTLFn = CGM.CreateRuntimeFunction(
2375         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2376     break;
2377   }
2378   case OMPRTL__kmpc_alloc: {
2379     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2380     // al); omp_allocator_handle_t type is void *.
2381     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2382     auto *FnTy =
2383         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2384     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2385     break;
2386   }
2387   case OMPRTL__kmpc_free: {
2388     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2389     // al); omp_allocator_handle_t type is void *.
2390     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2391     auto *FnTy =
2392         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2393     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2394     break;
2395   }
2396   case OMPRTL__kmpc_push_target_tripcount: {
2397     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2398     // size);
2399     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2400     llvm::FunctionType *FnTy =
2401         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2402     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2403     break;
2404   }
2405   case OMPRTL__tgt_target: {
2406     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2407     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2408     // *arg_types);
2409     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2410                                 CGM.VoidPtrTy,
2411                                 CGM.Int32Ty,
2412                                 CGM.VoidPtrPtrTy,
2413                                 CGM.VoidPtrPtrTy,
2414                                 CGM.Int64Ty->getPointerTo(),
2415                                 CGM.Int64Ty->getPointerTo()};
2416     auto *FnTy =
2417         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2418     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2419     break;
2420   }
2421   case OMPRTL__tgt_target_nowait: {
2422     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2423     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2424     // int64_t *arg_types);
2425     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2426                                 CGM.VoidPtrTy,
2427                                 CGM.Int32Ty,
2428                                 CGM.VoidPtrPtrTy,
2429                                 CGM.VoidPtrPtrTy,
2430                                 CGM.Int64Ty->getPointerTo(),
2431                                 CGM.Int64Ty->getPointerTo()};
2432     auto *FnTy =
2433         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2434     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2435     break;
2436   }
2437   case OMPRTL__tgt_target_teams: {
2438     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2439     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2440     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2441     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2442                                 CGM.VoidPtrTy,
2443                                 CGM.Int32Ty,
2444                                 CGM.VoidPtrPtrTy,
2445                                 CGM.VoidPtrPtrTy,
2446                                 CGM.Int64Ty->getPointerTo(),
2447                                 CGM.Int64Ty->getPointerTo(),
2448                                 CGM.Int32Ty,
2449                                 CGM.Int32Ty};
2450     auto *FnTy =
2451         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2452     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2453     break;
2454   }
2455   case OMPRTL__tgt_target_teams_nowait: {
2456     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2457     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2458     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2459     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2460                                 CGM.VoidPtrTy,
2461                                 CGM.Int32Ty,
2462                                 CGM.VoidPtrPtrTy,
2463                                 CGM.VoidPtrPtrTy,
2464                                 CGM.Int64Ty->getPointerTo(),
2465                                 CGM.Int64Ty->getPointerTo(),
2466                                 CGM.Int32Ty,
2467                                 CGM.Int32Ty};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_register_requires: {
2474     // Build void __tgt_register_requires(int64_t flags);
2475     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2479     break;
2480   }
2481   case OMPRTL__tgt_target_data_begin: {
2482     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2483     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2484     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2485                                 CGM.Int32Ty,
2486                                 CGM.VoidPtrPtrTy,
2487                                 CGM.VoidPtrPtrTy,
2488                                 CGM.Int64Ty->getPointerTo(),
2489                                 CGM.Int64Ty->getPointerTo()};
2490     auto *FnTy =
2491         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2492     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2493     break;
2494   }
2495   case OMPRTL__tgt_target_data_begin_nowait: {
2496     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2497     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2498     // *arg_types);
2499     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2500                                 CGM.Int32Ty,
2501                                 CGM.VoidPtrPtrTy,
2502                                 CGM.VoidPtrPtrTy,
2503                                 CGM.Int64Ty->getPointerTo(),
2504                                 CGM.Int64Ty->getPointerTo()};
2505     auto *FnTy =
2506         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2507     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2508     break;
2509   }
2510   case OMPRTL__tgt_target_data_end: {
2511     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2512     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2513     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2514                                 CGM.Int32Ty,
2515                                 CGM.VoidPtrPtrTy,
2516                                 CGM.VoidPtrPtrTy,
2517                                 CGM.Int64Ty->getPointerTo(),
2518                                 CGM.Int64Ty->getPointerTo()};
2519     auto *FnTy =
2520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2522     break;
2523   }
2524   case OMPRTL__tgt_target_data_end_nowait: {
2525     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2526     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2527     // *arg_types);
2528     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2529                                 CGM.Int32Ty,
2530                                 CGM.VoidPtrPtrTy,
2531                                 CGM.VoidPtrPtrTy,
2532                                 CGM.Int64Ty->getPointerTo(),
2533                                 CGM.Int64Ty->getPointerTo()};
2534     auto *FnTy =
2535         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2536     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2537     break;
2538   }
2539   case OMPRTL__tgt_target_data_update: {
2540     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2541     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2542     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2543                                 CGM.Int32Ty,
2544                                 CGM.VoidPtrPtrTy,
2545                                 CGM.VoidPtrPtrTy,
2546                                 CGM.Int64Ty->getPointerTo(),
2547                                 CGM.Int64Ty->getPointerTo()};
2548     auto *FnTy =
2549         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2550     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2551     break;
2552   }
2553   case OMPRTL__tgt_target_data_update_nowait: {
2554     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2555     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2556     // *arg_types);
2557     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2558                                 CGM.Int32Ty,
2559                                 CGM.VoidPtrPtrTy,
2560                                 CGM.VoidPtrPtrTy,
2561                                 CGM.Int64Ty->getPointerTo(),
2562                                 CGM.Int64Ty->getPointerTo()};
2563     auto *FnTy =
2564         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2565     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2566     break;
2567   }
2568   case OMPRTL__tgt_mapper_num_components: {
2569     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2570     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2571     auto *FnTy =
2572         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2573     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2574     break;
2575   }
2576   case OMPRTL__tgt_push_mapper_component: {
2577     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2578     // *base, void *begin, int64_t size, int64_t type);
2579     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2580                                 CGM.Int64Ty, CGM.Int64Ty};
2581     auto *FnTy =
2582         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2583     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2584     break;
2585   }
2586   }
2587   assert(RTLFn && "Unable to find OpenMP runtime function");
2588   return RTLFn;
2589 }
2590 
2591 llvm::FunctionCallee
2592 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2593   assert((IVSize == 32 || IVSize == 64) &&
2594          "IV size is not compatible with the omp runtime");
2595   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2596                                             : "__kmpc_for_static_init_4u")
2597                                 : (IVSigned ? "__kmpc_for_static_init_8"
2598                                             : "__kmpc_for_static_init_8u");
2599   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2600   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2601   llvm::Type *TypeParams[] = {
2602     getIdentTyPointerTy(),                     // loc
2603     CGM.Int32Ty,                               // tid
2604     CGM.Int32Ty,                               // schedtype
2605     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2606     PtrTy,                                     // p_lower
2607     PtrTy,                                     // p_upper
2608     PtrTy,                                     // p_stride
2609     ITy,                                       // incr
2610     ITy                                        // chunk
2611   };
2612   auto *FnTy =
2613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2614   return CGM.CreateRuntimeFunction(FnTy, Name);
2615 }
2616 
2617 llvm::FunctionCallee
2618 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2619   assert((IVSize == 32 || IVSize == 64) &&
2620          "IV size is not compatible with the omp runtime");
2621   StringRef Name =
2622       IVSize == 32
2623           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2624           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2626   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2627                                CGM.Int32Ty,           // tid
2628                                CGM.Int32Ty,           // schedtype
2629                                ITy,                   // lower
2630                                ITy,                   // upper
2631                                ITy,                   // stride
2632                                ITy                    // chunk
2633   };
2634   auto *FnTy =
2635       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2636   return CGM.CreateRuntimeFunction(FnTy, Name);
2637 }
2638 
2639 llvm::FunctionCallee
2640 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2641   assert((IVSize == 32 || IVSize == 64) &&
2642          "IV size is not compatible with the omp runtime");
2643   StringRef Name =
2644       IVSize == 32
2645           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2646           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2647   llvm::Type *TypeParams[] = {
2648       getIdentTyPointerTy(), // loc
2649       CGM.Int32Ty,           // tid
2650   };
2651   auto *FnTy =
2652       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2653   return CGM.CreateRuntimeFunction(FnTy, Name);
2654 }
2655 
2656 llvm::FunctionCallee
2657 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2658   assert((IVSize == 32 || IVSize == 64) &&
2659          "IV size is not compatible with the omp runtime");
2660   StringRef Name =
2661       IVSize == 32
2662           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2663           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2664   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2665   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2666   llvm::Type *TypeParams[] = {
2667     getIdentTyPointerTy(),                     // loc
2668     CGM.Int32Ty,                               // tid
2669     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2670     PtrTy,                                     // p_lower
2671     PtrTy,                                     // p_upper
2672     PtrTy                                      // p_stride
2673   };
2674   auto *FnTy =
2675       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2676   return CGM.CreateRuntimeFunction(FnTy, Name);
2677 }
2678 
2679 /// Obtain information that uniquely identifies a target entry. This
2680 /// consists of the file and device IDs as well as line number associated with
2681 /// the relevant entry source location.
2682 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2683                                      unsigned &DeviceID, unsigned &FileID,
2684                                      unsigned &LineNum) {
2685   SourceManager &SM = C.getSourceManager();
2686 
2687   // The loc should be always valid and have a file ID (the user cannot use
2688   // #pragma directives in macros)
2689 
2690   assert(Loc.isValid() && "Source location is expected to be always valid.");
2691 
2692   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2693   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2694 
2695   llvm::sys::fs::UniqueID ID;
2696   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2697     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2698         << PLoc.getFilename() << EC.message();
2699 
2700   DeviceID = ID.getDevice();
2701   FileID = ID.getFile();
2702   LineNum = PLoc.getLine();
2703 }
2704 
2705 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2706   if (CGM.getLangOpts().OpenMPSimd)
2707     return Address::invalid();
2708   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2709       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2710   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2711               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2712                HasRequiresUnifiedSharedMemory))) {
2713     SmallString<64> PtrName;
2714     {
2715       llvm::raw_svector_ostream OS(PtrName);
2716       OS << CGM.getMangledName(GlobalDecl(VD));
2717       if (!VD->isExternallyVisible()) {
2718         unsigned DeviceID, FileID, Line;
2719         getTargetEntryUniqueInfo(CGM.getContext(),
2720                                  VD->getCanonicalDecl()->getBeginLoc(),
2721                                  DeviceID, FileID, Line);
2722         OS << llvm::format("_%x", FileID);
2723       }
2724       OS << "_decl_tgt_ref_ptr";
2725     }
2726     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2727     if (!Ptr) {
2728       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2729       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2730                                         PtrName);
2731 
2732       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2733       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2734 
2735       if (!CGM.getLangOpts().OpenMPIsDevice)
2736         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2737       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2738     }
2739     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2740   }
2741   return Address::invalid();
2742 }
2743 
2744 llvm::Constant *
2745 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2746   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2747          !CGM.getContext().getTargetInfo().isTLSSupported());
2748   // Lookup the entry, lazily creating it if necessary.
2749   std::string Suffix = getName({"cache", ""});
2750   return getOrCreateInternalVariable(
2751       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2752 }
2753 
2754 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2755                                                 const VarDecl *VD,
2756                                                 Address VDAddr,
2757                                                 SourceLocation Loc) {
2758   if (CGM.getLangOpts().OpenMPUseTLS &&
2759       CGM.getContext().getTargetInfo().isTLSSupported())
2760     return VDAddr;
2761 
2762   llvm::Type *VarTy = VDAddr.getElementType();
2763   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2764                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2765                                                        CGM.Int8PtrTy),
2766                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2767                          getOrCreateThreadPrivateCache(VD)};
2768   return Address(CGF.EmitRuntimeCall(
2769       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2770                  VDAddr.getAlignment());
2771 }
2772 
2773 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2774     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2775     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2776   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2777   // library.
2778   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2779   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2780                       OMPLoc);
2781   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2782   // to register constructor/destructor for variable.
2783   llvm::Value *Args[] = {
2784       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2785       Ctor, CopyCtor, Dtor};
2786   CGF.EmitRuntimeCall(
2787       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2788 }
2789 
2790 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2791     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2792     bool PerformInit, CodeGenFunction *CGF) {
2793   if (CGM.getLangOpts().OpenMPUseTLS &&
2794       CGM.getContext().getTargetInfo().isTLSSupported())
2795     return nullptr;
2796 
2797   VD = VD->getDefinition(CGM.getContext());
2798   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2799     QualType ASTTy = VD->getType();
2800 
2801     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2802     const Expr *Init = VD->getAnyInitializer();
2803     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2804       // Generate function that re-emits the declaration's initializer into the
2805       // threadprivate copy of the variable VD
2806       CodeGenFunction CtorCGF(CGM);
2807       FunctionArgList Args;
2808       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2809                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2810                             ImplicitParamDecl::Other);
2811       Args.push_back(&Dst);
2812 
2813       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2814           CGM.getContext().VoidPtrTy, Args);
2815       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2816       std::string Name = getName({"__kmpc_global_ctor_", ""});
2817       llvm::Function *Fn =
2818           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2819       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2820                             Args, Loc, Loc);
2821       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2822           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2823           CGM.getContext().VoidPtrTy, Dst.getLocation());
2824       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2825       Arg = CtorCGF.Builder.CreateElementBitCast(
2826           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2827       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2828                                /*IsInitializer=*/true);
2829       ArgVal = CtorCGF.EmitLoadOfScalar(
2830           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2831           CGM.getContext().VoidPtrTy, Dst.getLocation());
2832       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2833       CtorCGF.FinishFunction();
2834       Ctor = Fn;
2835     }
2836     if (VD->getType().isDestructedType() != QualType::DK_none) {
2837       // Generate function that emits destructor call for the threadprivate copy
2838       // of the variable VD
2839       CodeGenFunction DtorCGF(CGM);
2840       FunctionArgList Args;
2841       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2842                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2843                             ImplicitParamDecl::Other);
2844       Args.push_back(&Dst);
2845 
2846       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2847           CGM.getContext().VoidTy, Args);
2848       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2849       std::string Name = getName({"__kmpc_global_dtor_", ""});
2850       llvm::Function *Fn =
2851           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2852       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2853       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2854                             Loc, Loc);
2855       // Create a scope with an artificial location for the body of this function.
2856       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2857       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2858           DtorCGF.GetAddrOfLocalVar(&Dst),
2859           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2860       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2861                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2862                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2863       DtorCGF.FinishFunction();
2864       Dtor = Fn;
2865     }
2866     // Do not emit init function if it is not required.
2867     if (!Ctor && !Dtor)
2868       return nullptr;
2869 
2870     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2871     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2872                                                /*isVarArg=*/false)
2873                            ->getPointerTo();
2874     // Copying constructor for the threadprivate variable.
2875     // Must be NULL - reserved by runtime, but currently it requires that this
2876     // parameter is always NULL. Otherwise it fires assertion.
2877     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2878     if (Ctor == nullptr) {
2879       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2880                                              /*isVarArg=*/false)
2881                          ->getPointerTo();
2882       Ctor = llvm::Constant::getNullValue(CtorTy);
2883     }
2884     if (Dtor == nullptr) {
2885       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2886                                              /*isVarArg=*/false)
2887                          ->getPointerTo();
2888       Dtor = llvm::Constant::getNullValue(DtorTy);
2889     }
2890     if (!CGF) {
2891       auto *InitFunctionTy =
2892           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2893       std::string Name = getName({"__omp_threadprivate_init_", ""});
2894       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2895           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2896       CodeGenFunction InitCGF(CGM);
2897       FunctionArgList ArgList;
2898       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2899                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2900                             Loc, Loc);
2901       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2902       InitCGF.FinishFunction();
2903       return InitFunction;
2904     }
2905     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2906   }
2907   return nullptr;
2908 }
2909 
2910 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2911                                                      llvm::GlobalVariable *Addr,
2912                                                      bool PerformInit) {
2913   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2914       !CGM.getLangOpts().OpenMPIsDevice)
2915     return false;
2916   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2917       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2918   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2919       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2920        HasRequiresUnifiedSharedMemory))
2921     return CGM.getLangOpts().OpenMPIsDevice;
2922   VD = VD->getDefinition(CGM.getContext());
2923   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2924     return CGM.getLangOpts().OpenMPIsDevice;
2925 
2926   QualType ASTTy = VD->getType();
2927 
2928   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2929   // Produce the unique prefix to identify the new target regions. We use
2930   // the source location of the variable declaration which we know to not
2931   // conflict with any target region.
2932   unsigned DeviceID;
2933   unsigned FileID;
2934   unsigned Line;
2935   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2936   SmallString<128> Buffer, Out;
2937   {
2938     llvm::raw_svector_ostream OS(Buffer);
2939     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2940        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2941   }
2942 
2943   const Expr *Init = VD->getAnyInitializer();
2944   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2945     llvm::Constant *Ctor;
2946     llvm::Constant *ID;
2947     if (CGM.getLangOpts().OpenMPIsDevice) {
2948       // Generate function that re-emits the declaration's initializer into
2949       // the threadprivate copy of the variable VD
2950       CodeGenFunction CtorCGF(CGM);
2951 
2952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2954       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2955           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2956       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2957       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2958                             FunctionArgList(), Loc, Loc);
2959       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2960       CtorCGF.EmitAnyExprToMem(Init,
2961                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2962                                Init->getType().getQualifiers(),
2963                                /*IsInitializer=*/true);
2964       CtorCGF.FinishFunction();
2965       Ctor = Fn;
2966       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2967       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2968     } else {
2969       Ctor = new llvm::GlobalVariable(
2970           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2971           llvm::GlobalValue::PrivateLinkage,
2972           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2973       ID = Ctor;
2974     }
2975 
2976     // Register the information for the entry associated with the constructor.
2977     Out.clear();
2978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2979         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2981   }
2982   if (VD->getType().isDestructedType() != QualType::DK_none) {
2983     llvm::Constant *Dtor;
2984     llvm::Constant *ID;
2985     if (CGM.getLangOpts().OpenMPIsDevice) {
2986       // Generate function that emits destructor call for the threadprivate
2987       // copy of the variable VD
2988       CodeGenFunction DtorCGF(CGM);
2989 
2990       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2991       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2992       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2993           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2994       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2995       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2996                             FunctionArgList(), Loc, Loc);
2997       // Create a scope with an artificial location for the body of this
2998       // function.
2999       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3000       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3001                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3002                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3003       DtorCGF.FinishFunction();
3004       Dtor = Fn;
3005       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3006       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3007     } else {
3008       Dtor = new llvm::GlobalVariable(
3009           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3010           llvm::GlobalValue::PrivateLinkage,
3011           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3012       ID = Dtor;
3013     }
3014     // Register the information for the entry associated with the destructor.
3015     Out.clear();
3016     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3017         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3018         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3019   }
3020   return CGM.getLangOpts().OpenMPIsDevice;
3021 }
3022 
3023 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3024                                                           QualType VarType,
3025                                                           StringRef Name) {
3026   std::string Suffix = getName({"artificial", ""});
3027   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3028   llvm::Value *GAddr =
3029       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3030   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3031       CGM.getTarget().isTLSSupported()) {
3032     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3033     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3034   }
3035   std::string CacheSuffix = getName({"cache", ""});
3036   llvm::Value *Args[] = {
3037       emitUpdateLocation(CGF, SourceLocation()),
3038       getThreadID(CGF, SourceLocation()),
3039       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3040       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3041                                 /*isSigned=*/false),
3042       getOrCreateInternalVariable(
3043           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3044   return Address(
3045       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3046           CGF.EmitRuntimeCall(
3047               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3048           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3049       CGM.getContext().getTypeAlignInChars(VarType));
3050 }
3051 
3052 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3053                                    const RegionCodeGenTy &ThenGen,
3054                                    const RegionCodeGenTy &ElseGen) {
3055   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3056 
3057   // If the condition constant folds and can be elided, try to avoid emitting
3058   // the condition and the dead arm of the if/else.
3059   bool CondConstant;
3060   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3061     if (CondConstant)
3062       ThenGen(CGF);
3063     else
3064       ElseGen(CGF);
3065     return;
3066   }
3067 
3068   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3069   // emit the conditional branch.
3070   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3071   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3072   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3073   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3074 
3075   // Emit the 'then' code.
3076   CGF.EmitBlock(ThenBlock);
3077   ThenGen(CGF);
3078   CGF.EmitBranch(ContBlock);
3079   // Emit the 'else' code if present.
3080   // There is no need to emit line number for unconditional branch.
3081   (void)ApplyDebugLocation::CreateEmpty(CGF);
3082   CGF.EmitBlock(ElseBlock);
3083   ElseGen(CGF);
3084   // There is no need to emit line number for unconditional branch.
3085   (void)ApplyDebugLocation::CreateEmpty(CGF);
3086   CGF.EmitBranch(ContBlock);
3087   // Emit the continuation block for code after the if.
3088   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3089 }
3090 
3091 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3092                                        llvm::Function *OutlinedFn,
3093                                        ArrayRef<llvm::Value *> CapturedVars,
3094                                        const Expr *IfCond) {
3095   if (!CGF.HaveInsertPoint())
3096     return;
3097   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3098   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3099                                                      PrePostActionTy &) {
3100     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3102     llvm::Value *Args[] = {
3103         RTLoc,
3104         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3105         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3106     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3107     RealArgs.append(std::begin(Args), std::end(Args));
3108     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3109 
3110     llvm::FunctionCallee RTLFn =
3111         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3112     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3113   };
3114   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3115                                                           PrePostActionTy &) {
3116     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3117     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3118     // Build calls:
3119     // __kmpc_serialized_parallel(&Loc, GTid);
3120     llvm::Value *Args[] = {RTLoc, ThreadID};
3121     CGF.EmitRuntimeCall(
3122         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3123 
3124     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3125     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3126     Address ZeroAddrBound =
3127         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3128                                          /*Name=*/".bound.zero.addr");
3129     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3130     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3131     // ThreadId for serialized parallels is 0.
3132     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3133     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3134     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3135     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3136 
3137     // __kmpc_end_serialized_parallel(&Loc, GTid);
3138     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3139     CGF.EmitRuntimeCall(
3140         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3141         EndArgs);
3142   };
3143   if (IfCond) {
3144     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3145   } else {
3146     RegionCodeGenTy ThenRCG(ThenGen);
3147     ThenRCG(CGF);
3148   }
3149 }
3150 
3151 // If we're inside an (outlined) parallel region, use the region info's
3152 // thread-ID variable (it is passed in a first argument of the outlined function
3153 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3154 // regular serial code region, get thread ID by calling kmp_int32
3155 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3156 // return the address of that temp.
3157 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3158                                              SourceLocation Loc) {
3159   if (auto *OMPRegionInfo =
3160           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3161     if (OMPRegionInfo->getThreadIDVariable())
3162       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3163 
3164   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3165   QualType Int32Ty =
3166       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3167   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3168   CGF.EmitStoreOfScalar(ThreadID,
3169                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3170 
3171   return ThreadIDTemp;
3172 }
3173 
3174 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3175     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3176   SmallString<256> Buffer;
3177   llvm::raw_svector_ostream Out(Buffer);
3178   Out << Name;
3179   StringRef RuntimeName = Out.str();
3180   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3181   if (Elem.second) {
3182     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3183            "OMP internal variable has different type than requested");
3184     return &*Elem.second;
3185   }
3186 
3187   return Elem.second = new llvm::GlobalVariable(
3188              CGM.getModule(), Ty, /*IsConstant*/ false,
3189              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3190              Elem.first(), /*InsertBefore=*/nullptr,
3191              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3192 }
3193 
3194 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3195   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3196   std::string Name = getName({Prefix, "var"});
3197   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3198 }
3199 
3200 namespace {
3201 /// Common pre(post)-action for different OpenMP constructs.
3202 class CommonActionTy final : public PrePostActionTy {
3203   llvm::FunctionCallee EnterCallee;
3204   ArrayRef<llvm::Value *> EnterArgs;
3205   llvm::FunctionCallee ExitCallee;
3206   ArrayRef<llvm::Value *> ExitArgs;
3207   bool Conditional;
3208   llvm::BasicBlock *ContBlock = nullptr;
3209 
3210 public:
3211   CommonActionTy(llvm::FunctionCallee EnterCallee,
3212                  ArrayRef<llvm::Value *> EnterArgs,
3213                  llvm::FunctionCallee ExitCallee,
3214                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3215       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3216         ExitArgs(ExitArgs), Conditional(Conditional) {}
3217   void Enter(CodeGenFunction &CGF) override {
3218     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3219     if (Conditional) {
3220       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3221       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3222       ContBlock = CGF.createBasicBlock("omp_if.end");
3223       // Generate the branch (If-stmt)
3224       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3225       CGF.EmitBlock(ThenBlock);
3226     }
3227   }
3228   void Done(CodeGenFunction &CGF) {
3229     // Emit the rest of blocks/branches
3230     CGF.EmitBranch(ContBlock);
3231     CGF.EmitBlock(ContBlock, true);
3232   }
3233   void Exit(CodeGenFunction &CGF) override {
3234     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3235   }
3236 };
3237 } // anonymous namespace
3238 
3239 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3240                                          StringRef CriticalName,
3241                                          const RegionCodeGenTy &CriticalOpGen,
3242                                          SourceLocation Loc, const Expr *Hint) {
3243   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3244   // CriticalOpGen();
3245   // __kmpc_end_critical(ident_t *, gtid, Lock);
3246   // Prepare arguments and build a call to __kmpc_critical
3247   if (!CGF.HaveInsertPoint())
3248     return;
3249   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3250                          getCriticalRegionLock(CriticalName)};
3251   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3252                                                 std::end(Args));
3253   if (Hint) {
3254     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3255         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3256   }
3257   CommonActionTy Action(
3258       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3259                                  : OMPRTL__kmpc_critical),
3260       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3261   CriticalOpGen.setAction(Action);
3262   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3263 }
3264 
3265 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3266                                        const RegionCodeGenTy &MasterOpGen,
3267                                        SourceLocation Loc) {
3268   if (!CGF.HaveInsertPoint())
3269     return;
3270   // if(__kmpc_master(ident_t *, gtid)) {
3271   //   MasterOpGen();
3272   //   __kmpc_end_master(ident_t *, gtid);
3273   // }
3274   // Prepare arguments and build a call to __kmpc_master
3275   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3276   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3277                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3278                         /*Conditional=*/true);
3279   MasterOpGen.setAction(Action);
3280   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3281   Action.Done(CGF);
3282 }
3283 
3284 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3285                                         SourceLocation Loc) {
3286   if (!CGF.HaveInsertPoint())
3287     return;
3288   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3289   llvm::Value *Args[] = {
3290       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3291       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3292   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3293   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3294     Region->emitUntiedSwitch(CGF);
3295 }
3296 
3297 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3298                                           const RegionCodeGenTy &TaskgroupOpGen,
3299                                           SourceLocation Loc) {
3300   if (!CGF.HaveInsertPoint())
3301     return;
3302   // __kmpc_taskgroup(ident_t *, gtid);
3303   // TaskgroupOpGen();
3304   // __kmpc_end_taskgroup(ident_t *, gtid);
3305   // Prepare arguments and build a call to __kmpc_taskgroup
3306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3307   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3308                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3309                         Args);
3310   TaskgroupOpGen.setAction(Action);
3311   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3312 }
3313 
3314 /// Given an array of pointers to variables, project the address of a
3315 /// given variable.
3316 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3317                                       unsigned Index, const VarDecl *Var) {
3318   // Pull out the pointer to the variable.
3319   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3320   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3321 
3322   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3323   Addr = CGF.Builder.CreateElementBitCast(
3324       Addr, CGF.ConvertTypeForMem(Var->getType()));
3325   return Addr;
3326 }
3327 
3328 static llvm::Value *emitCopyprivateCopyFunction(
3329     CodeGenModule &CGM, llvm::Type *ArgsType,
3330     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3331     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3332     SourceLocation Loc) {
3333   ASTContext &C = CGM.getContext();
3334   // void copy_func(void *LHSArg, void *RHSArg);
3335   FunctionArgList Args;
3336   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3337                            ImplicitParamDecl::Other);
3338   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3339                            ImplicitParamDecl::Other);
3340   Args.push_back(&LHSArg);
3341   Args.push_back(&RHSArg);
3342   const auto &CGFI =
3343       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3344   std::string Name =
3345       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3346   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3347                                     llvm::GlobalValue::InternalLinkage, Name,
3348                                     &CGM.getModule());
3349   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3350   Fn->setDoesNotRecurse();
3351   CodeGenFunction CGF(CGM);
3352   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3353   // Dest = (void*[n])(LHSArg);
3354   // Src = (void*[n])(RHSArg);
3355   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3356       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3357       ArgsType), CGF.getPointerAlign());
3358   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3359       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3360       ArgsType), CGF.getPointerAlign());
3361   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3362   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3363   // ...
3364   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3365   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3366     const auto *DestVar =
3367         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3368     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3369 
3370     const auto *SrcVar =
3371         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3372     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3373 
3374     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3375     QualType Type = VD->getType();
3376     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3377   }
3378   CGF.FinishFunction();
3379   return Fn;
3380 }
3381 
3382 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3383                                        const RegionCodeGenTy &SingleOpGen,
3384                                        SourceLocation Loc,
3385                                        ArrayRef<const Expr *> CopyprivateVars,
3386                                        ArrayRef<const Expr *> SrcExprs,
3387                                        ArrayRef<const Expr *> DstExprs,
3388                                        ArrayRef<const Expr *> AssignmentOps) {
3389   if (!CGF.HaveInsertPoint())
3390     return;
3391   assert(CopyprivateVars.size() == SrcExprs.size() &&
3392          CopyprivateVars.size() == DstExprs.size() &&
3393          CopyprivateVars.size() == AssignmentOps.size());
3394   ASTContext &C = CGM.getContext();
3395   // int32 did_it = 0;
3396   // if(__kmpc_single(ident_t *, gtid)) {
3397   //   SingleOpGen();
3398   //   __kmpc_end_single(ident_t *, gtid);
3399   //   did_it = 1;
3400   // }
3401   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3402   // <copy_func>, did_it);
3403 
3404   Address DidIt = Address::invalid();
3405   if (!CopyprivateVars.empty()) {
3406     // int32 did_it = 0;
3407     QualType KmpInt32Ty =
3408         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3409     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3410     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3411   }
3412   // Prepare arguments and build a call to __kmpc_single
3413   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3414   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3415                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3416                         /*Conditional=*/true);
3417   SingleOpGen.setAction(Action);
3418   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3419   if (DidIt.isValid()) {
3420     // did_it = 1;
3421     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3422   }
3423   Action.Done(CGF);
3424   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3425   // <copy_func>, did_it);
3426   if (DidIt.isValid()) {
3427     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3428     QualType CopyprivateArrayTy = C.getConstantArrayType(
3429         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3430         /*IndexTypeQuals=*/0);
3431     // Create a list of all private variables for copyprivate.
3432     Address CopyprivateList =
3433         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3434     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3435       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3436       CGF.Builder.CreateStore(
3437           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3438               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3439               CGF.VoidPtrTy),
3440           Elem);
3441     }
3442     // Build function that copies private values from single region to all other
3443     // threads in the corresponding parallel region.
3444     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3445         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3446         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3447     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3448     Address CL =
3449       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3450                                                       CGF.VoidPtrTy);
3451     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3452     llvm::Value *Args[] = {
3453         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3454         getThreadID(CGF, Loc),        // i32 <gtid>
3455         BufSize,                      // size_t <buf_size>
3456         CL.getPointer(),              // void *<copyprivate list>
3457         CpyFn,                        // void (*) (void *, void *) <copy_func>
3458         DidItVal                      // i32 did_it
3459     };
3460     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3461   }
3462 }
3463 
3464 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3465                                         const RegionCodeGenTy &OrderedOpGen,
3466                                         SourceLocation Loc, bool IsThreads) {
3467   if (!CGF.HaveInsertPoint())
3468     return;
3469   // __kmpc_ordered(ident_t *, gtid);
3470   // OrderedOpGen();
3471   // __kmpc_end_ordered(ident_t *, gtid);
3472   // Prepare arguments and build a call to __kmpc_ordered
3473   if (IsThreads) {
3474     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3475     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3476                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3477                           Args);
3478     OrderedOpGen.setAction(Action);
3479     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3480     return;
3481   }
3482   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3483 }
3484 
3485 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3486   unsigned Flags;
3487   if (Kind == OMPD_for)
3488     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3489   else if (Kind == OMPD_sections)
3490     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3491   else if (Kind == OMPD_single)
3492     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3493   else if (Kind == OMPD_barrier)
3494     Flags = OMP_IDENT_BARRIER_EXPL;
3495   else
3496     Flags = OMP_IDENT_BARRIER_IMPL;
3497   return Flags;
3498 }
3499 
3500 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3501     CodeGenFunction &CGF, const OMPLoopDirective &S,
3502     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3503   // Check if the loop directive is actually a doacross loop directive. In this
3504   // case choose static, 1 schedule.
3505   if (llvm::any_of(
3506           S.getClausesOfKind<OMPOrderedClause>(),
3507           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3508     ScheduleKind = OMPC_SCHEDULE_static;
3509     // Chunk size is 1 in this case.
3510     llvm::APInt ChunkSize(32, 1);
3511     ChunkExpr = IntegerLiteral::Create(
3512         CGF.getContext(), ChunkSize,
3513         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3514         SourceLocation());
3515   }
3516 }
3517 
3518 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3519                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3520                                       bool ForceSimpleCall) {
3521   // Check if we should use the OMPBuilder
3522   auto *OMPRegionInfo =
3523       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3524   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3525   if (OMPBuilder) {
3526     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3527         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3528     return;
3529   }
3530 
3531   if (!CGF.HaveInsertPoint())
3532     return;
3533   // Build call __kmpc_cancel_barrier(loc, thread_id);
3534   // Build call __kmpc_barrier(loc, thread_id);
3535   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3536   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3537   // thread_id);
3538   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3539                          getThreadID(CGF, Loc)};
3540   if (OMPRegionInfo) {
3541     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3542       llvm::Value *Result = CGF.EmitRuntimeCall(
3543           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3544       if (EmitChecks) {
3545         // if (__kmpc_cancel_barrier()) {
3546         //   exit from construct;
3547         // }
3548         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3549         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3550         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3551         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3552         CGF.EmitBlock(ExitBB);
3553         //   exit from construct;
3554         CodeGenFunction::JumpDest CancelDestination =
3555             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3556         CGF.EmitBranchThroughCleanup(CancelDestination);
3557         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3558       }
3559       return;
3560     }
3561   }
3562   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3563 }
3564 
3565 /// Map the OpenMP loop schedule to the runtime enumeration.
3566 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3567                                           bool Chunked, bool Ordered) {
3568   switch (ScheduleKind) {
3569   case OMPC_SCHEDULE_static:
3570     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3571                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3572   case OMPC_SCHEDULE_dynamic:
3573     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3574   case OMPC_SCHEDULE_guided:
3575     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3576   case OMPC_SCHEDULE_runtime:
3577     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3578   case OMPC_SCHEDULE_auto:
3579     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3580   case OMPC_SCHEDULE_unknown:
3581     assert(!Chunked && "chunk was specified but schedule kind not known");
3582     return Ordered ? OMP_ord_static : OMP_sch_static;
3583   }
3584   llvm_unreachable("Unexpected runtime schedule");
3585 }
3586 
3587 /// Map the OpenMP distribute schedule to the runtime enumeration.
3588 static OpenMPSchedType
3589 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3590   // only static is allowed for dist_schedule
3591   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3592 }
3593 
3594 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3595                                          bool Chunked) const {
3596   OpenMPSchedType Schedule =
3597       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3598   return Schedule == OMP_sch_static;
3599 }
3600 
3601 bool CGOpenMPRuntime::isStaticNonchunked(
3602     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3603   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3604   return Schedule == OMP_dist_sch_static;
3605 }
3606 
3607 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3608                                       bool Chunked) const {
3609   OpenMPSchedType Schedule =
3610       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3611   return Schedule == OMP_sch_static_chunked;
3612 }
3613 
3614 bool CGOpenMPRuntime::isStaticChunked(
3615     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3616   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3617   return Schedule == OMP_dist_sch_static_chunked;
3618 }
3619 
3620 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3621   OpenMPSchedType Schedule =
3622       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3623   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3624   return Schedule != OMP_sch_static;
3625 }
3626 
3627 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3628                                   OpenMPScheduleClauseModifier M1,
3629                                   OpenMPScheduleClauseModifier M2) {
3630   int Modifier = 0;
3631   switch (M1) {
3632   case OMPC_SCHEDULE_MODIFIER_monotonic:
3633     Modifier = OMP_sch_modifier_monotonic;
3634     break;
3635   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3636     Modifier = OMP_sch_modifier_nonmonotonic;
3637     break;
3638   case OMPC_SCHEDULE_MODIFIER_simd:
3639     if (Schedule == OMP_sch_static_chunked)
3640       Schedule = OMP_sch_static_balanced_chunked;
3641     break;
3642   case OMPC_SCHEDULE_MODIFIER_last:
3643   case OMPC_SCHEDULE_MODIFIER_unknown:
3644     break;
3645   }
3646   switch (M2) {
3647   case OMPC_SCHEDULE_MODIFIER_monotonic:
3648     Modifier = OMP_sch_modifier_monotonic;
3649     break;
3650   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3651     Modifier = OMP_sch_modifier_nonmonotonic;
3652     break;
3653   case OMPC_SCHEDULE_MODIFIER_simd:
3654     if (Schedule == OMP_sch_static_chunked)
3655       Schedule = OMP_sch_static_balanced_chunked;
3656     break;
3657   case OMPC_SCHEDULE_MODIFIER_last:
3658   case OMPC_SCHEDULE_MODIFIER_unknown:
3659     break;
3660   }
3661   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3662   // If the static schedule kind is specified or if the ordered clause is
3663   // specified, and if the nonmonotonic modifier is not specified, the effect is
3664   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3665   // modifier is specified, the effect is as if the nonmonotonic modifier is
3666   // specified.
3667   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3668     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3669           Schedule == OMP_sch_static_balanced_chunked ||
3670           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3671           Schedule == OMP_dist_sch_static_chunked ||
3672           Schedule == OMP_dist_sch_static))
3673       Modifier = OMP_sch_modifier_nonmonotonic;
3674   }
3675   return Schedule | Modifier;
3676 }
3677 
3678 void CGOpenMPRuntime::emitForDispatchInit(
3679     CodeGenFunction &CGF, SourceLocation Loc,
3680     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3681     bool Ordered, const DispatchRTInput &DispatchValues) {
3682   if (!CGF.HaveInsertPoint())
3683     return;
3684   OpenMPSchedType Schedule = getRuntimeSchedule(
3685       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3686   assert(Ordered ||
3687          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3688           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3689           Schedule != OMP_sch_static_balanced_chunked));
3690   // Call __kmpc_dispatch_init(
3691   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3692   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3693   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3694 
3695   // If the Chunk was not specified in the clause - use default value 1.
3696   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3697                                             : CGF.Builder.getIntN(IVSize, 1);
3698   llvm::Value *Args[] = {
3699       emitUpdateLocation(CGF, Loc),
3700       getThreadID(CGF, Loc),
3701       CGF.Builder.getInt32(addMonoNonMonoModifier(
3702           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3703       DispatchValues.LB,                                     // Lower
3704       DispatchValues.UB,                                     // Upper
3705       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3706       Chunk                                                  // Chunk
3707   };
3708   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3709 }
3710 
3711 static void emitForStaticInitCall(
3712     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3713     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3714     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3715     const CGOpenMPRuntime::StaticRTInput &Values) {
3716   if (!CGF.HaveInsertPoint())
3717     return;
3718 
3719   assert(!Values.Ordered);
3720   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3721          Schedule == OMP_sch_static_balanced_chunked ||
3722          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3723          Schedule == OMP_dist_sch_static ||
3724          Schedule == OMP_dist_sch_static_chunked);
3725 
3726   // Call __kmpc_for_static_init(
3727   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3728   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3729   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3730   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3731   llvm::Value *Chunk = Values.Chunk;
3732   if (Chunk == nullptr) {
3733     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3734             Schedule == OMP_dist_sch_static) &&
3735            "expected static non-chunked schedule");
3736     // If the Chunk was not specified in the clause - use default value 1.
3737     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3738   } else {
3739     assert((Schedule == OMP_sch_static_chunked ||
3740             Schedule == OMP_sch_static_balanced_chunked ||
3741             Schedule == OMP_ord_static_chunked ||
3742             Schedule == OMP_dist_sch_static_chunked) &&
3743            "expected static chunked schedule");
3744   }
3745   llvm::Value *Args[] = {
3746       UpdateLocation,
3747       ThreadId,
3748       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3749                                                   M2)), // Schedule type
3750       Values.IL.getPointer(),                           // &isLastIter
3751       Values.LB.getPointer(),                           // &LB
3752       Values.UB.getPointer(),                           // &UB
3753       Values.ST.getPointer(),                           // &Stride
3754       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3755       Chunk                                             // Chunk
3756   };
3757   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3758 }
3759 
3760 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3761                                         SourceLocation Loc,
3762                                         OpenMPDirectiveKind DKind,
3763                                         const OpenMPScheduleTy &ScheduleKind,
3764                                         const StaticRTInput &Values) {
3765   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3766       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3767   assert(isOpenMPWorksharingDirective(DKind) &&
3768          "Expected loop-based or sections-based directive.");
3769   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3770                                              isOpenMPLoopDirective(DKind)
3771                                                  ? OMP_IDENT_WORK_LOOP
3772                                                  : OMP_IDENT_WORK_SECTIONS);
3773   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3774   llvm::FunctionCallee StaticInitFunction =
3775       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3776   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3777   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3778                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3779 }
3780 
3781 void CGOpenMPRuntime::emitDistributeStaticInit(
3782     CodeGenFunction &CGF, SourceLocation Loc,
3783     OpenMPDistScheduleClauseKind SchedKind,
3784     const CGOpenMPRuntime::StaticRTInput &Values) {
3785   OpenMPSchedType ScheduleNum =
3786       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3787   llvm::Value *UpdatedLocation =
3788       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3789   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3790   llvm::FunctionCallee StaticInitFunction =
3791       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3792   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3793                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3794                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3795 }
3796 
3797 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3798                                           SourceLocation Loc,
3799                                           OpenMPDirectiveKind DKind) {
3800   if (!CGF.HaveInsertPoint())
3801     return;
3802   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3803   llvm::Value *Args[] = {
3804       emitUpdateLocation(CGF, Loc,
3805                          isOpenMPDistributeDirective(DKind)
3806                              ? OMP_IDENT_WORK_DISTRIBUTE
3807                              : isOpenMPLoopDirective(DKind)
3808                                    ? OMP_IDENT_WORK_LOOP
3809                                    : OMP_IDENT_WORK_SECTIONS),
3810       getThreadID(CGF, Loc)};
3811   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3812   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3813                       Args);
3814 }
3815 
3816 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3817                                                  SourceLocation Loc,
3818                                                  unsigned IVSize,
3819                                                  bool IVSigned) {
3820   if (!CGF.HaveInsertPoint())
3821     return;
3822   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3823   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3824   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3825 }
3826 
3827 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3828                                           SourceLocation Loc, unsigned IVSize,
3829                                           bool IVSigned, Address IL,
3830                                           Address LB, Address UB,
3831                                           Address ST) {
3832   // Call __kmpc_dispatch_next(
3833   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3834   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3835   //          kmp_int[32|64] *p_stride);
3836   llvm::Value *Args[] = {
3837       emitUpdateLocation(CGF, Loc),
3838       getThreadID(CGF, Loc),
3839       IL.getPointer(), // &isLastIter
3840       LB.getPointer(), // &Lower
3841       UB.getPointer(), // &Upper
3842       ST.getPointer()  // &Stride
3843   };
3844   llvm::Value *Call =
3845       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3846   return CGF.EmitScalarConversion(
3847       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3848       CGF.getContext().BoolTy, Loc);
3849 }
3850 
3851 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3852                                            llvm::Value *NumThreads,
3853                                            SourceLocation Loc) {
3854   if (!CGF.HaveInsertPoint())
3855     return;
3856   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3857   llvm::Value *Args[] = {
3858       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3859       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3860   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3861                       Args);
3862 }
3863 
3864 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3865                                          ProcBindKind ProcBind,
3866                                          SourceLocation Loc) {
3867   if (!CGF.HaveInsertPoint())
3868     return;
3869   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3870   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3871   llvm::Value *Args[] = {
3872       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3873       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3874   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3875 }
3876 
3877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3878                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3879   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3880   if (OMPBuilder) {
3881     OMPBuilder->CreateFlush(CGF.Builder);
3882   } else {
3883     if (!CGF.HaveInsertPoint())
3884       return;
3885     // Build call void __kmpc_flush(ident_t *loc)
3886     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3887                         emitUpdateLocation(CGF, Loc));
3888   }
3889 }
3890 
3891 namespace {
3892 /// Indexes of fields for type kmp_task_t.
3893 enum KmpTaskTFields {
3894   /// List of shared variables.
3895   KmpTaskTShareds,
3896   /// Task routine.
3897   KmpTaskTRoutine,
3898   /// Partition id for the untied tasks.
3899   KmpTaskTPartId,
3900   /// Function with call of destructors for private variables.
3901   Data1,
3902   /// Task priority.
3903   Data2,
3904   /// (Taskloops only) Lower bound.
3905   KmpTaskTLowerBound,
3906   /// (Taskloops only) Upper bound.
3907   KmpTaskTUpperBound,
3908   /// (Taskloops only) Stride.
3909   KmpTaskTStride,
3910   /// (Taskloops only) Is last iteration flag.
3911   KmpTaskTLastIter,
3912   /// (Taskloops only) Reduction data.
3913   KmpTaskTReductions,
3914 };
3915 } // anonymous namespace
3916 
3917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3918   return OffloadEntriesTargetRegion.empty() &&
3919          OffloadEntriesDeviceGlobalVar.empty();
3920 }
3921 
3922 /// Initialize target region entry.
3923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3924     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3925                                     StringRef ParentName, unsigned LineNum,
3926                                     unsigned Order) {
3927   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3928                                              "only required for the device "
3929                                              "code generation.");
3930   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3931       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3932                                    OMPTargetRegionEntryTargetRegion);
3933   ++OffloadingEntriesNum;
3934 }
3935 
3936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3937     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3938                                   StringRef ParentName, unsigned LineNum,
3939                                   llvm::Constant *Addr, llvm::Constant *ID,
3940                                   OMPTargetRegionEntryKind Flags) {
3941   // If we are emitting code for a target, the entry is already initialized,
3942   // only has to be registered.
3943   if (CGM.getLangOpts().OpenMPIsDevice) {
3944     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3945       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3946           DiagnosticsEngine::Error,
3947           "Unable to find target region on line '%0' in the device code.");
3948       CGM.getDiags().Report(DiagID) << LineNum;
3949       return;
3950     }
3951     auto &Entry =
3952         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3953     assert(Entry.isValid() && "Entry not initialized!");
3954     Entry.setAddress(Addr);
3955     Entry.setID(ID);
3956     Entry.setFlags(Flags);
3957   } else {
3958     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3959     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3960     ++OffloadingEntriesNum;
3961   }
3962 }
3963 
3964 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3965     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3966     unsigned LineNum) const {
3967   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3968   if (PerDevice == OffloadEntriesTargetRegion.end())
3969     return false;
3970   auto PerFile = PerDevice->second.find(FileID);
3971   if (PerFile == PerDevice->second.end())
3972     return false;
3973   auto PerParentName = PerFile->second.find(ParentName);
3974   if (PerParentName == PerFile->second.end())
3975     return false;
3976   auto PerLine = PerParentName->second.find(LineNum);
3977   if (PerLine == PerParentName->second.end())
3978     return false;
3979   // Fail if this entry is already registered.
3980   if (PerLine->second.getAddress() || PerLine->second.getID())
3981     return false;
3982   return true;
3983 }
3984 
3985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3986     const OffloadTargetRegionEntryInfoActTy &Action) {
3987   // Scan all target region entries and perform the provided action.
3988   for (const auto &D : OffloadEntriesTargetRegion)
3989     for (const auto &F : D.second)
3990       for (const auto &P : F.second)
3991         for (const auto &L : P.second)
3992           Action(D.first, F.first, P.first(), L.first, L.second);
3993 }
3994 
3995 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3996     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3997                                        OMPTargetGlobalVarEntryKind Flags,
3998                                        unsigned Order) {
3999   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4000                                              "only required for the device "
4001                                              "code generation.");
4002   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4003   ++OffloadingEntriesNum;
4004 }
4005 
4006 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4007     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4008                                      CharUnits VarSize,
4009                                      OMPTargetGlobalVarEntryKind Flags,
4010                                      llvm::GlobalValue::LinkageTypes Linkage) {
4011   if (CGM.getLangOpts().OpenMPIsDevice) {
4012     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4013     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4014            "Entry not initialized!");
4015     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4016            "Resetting with the new address.");
4017     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4018       if (Entry.getVarSize().isZero()) {
4019         Entry.setVarSize(VarSize);
4020         Entry.setLinkage(Linkage);
4021       }
4022       return;
4023     }
4024     Entry.setVarSize(VarSize);
4025     Entry.setLinkage(Linkage);
4026     Entry.setAddress(Addr);
4027   } else {
4028     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4029       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4030       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4031              "Entry not initialized!");
4032       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4033              "Resetting with the new address.");
4034       if (Entry.getVarSize().isZero()) {
4035         Entry.setVarSize(VarSize);
4036         Entry.setLinkage(Linkage);
4037       }
4038       return;
4039     }
4040     OffloadEntriesDeviceGlobalVar.try_emplace(
4041         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4042     ++OffloadingEntriesNum;
4043   }
4044 }
4045 
4046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4047     actOnDeviceGlobalVarEntriesInfo(
4048         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4049   // Scan all target region entries and perform the provided action.
4050   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4051     Action(E.getKey(), E.getValue());
4052 }
4053 
4054 void CGOpenMPRuntime::createOffloadEntry(
4055     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4056     llvm::GlobalValue::LinkageTypes Linkage) {
4057   StringRef Name = Addr->getName();
4058   llvm::Module &M = CGM.getModule();
4059   llvm::LLVMContext &C = M.getContext();
4060 
4061   // Create constant string with the name.
4062   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4063 
4064   std::string StringName = getName({"omp_offloading", "entry_name"});
4065   auto *Str = new llvm::GlobalVariable(
4066       M, StrPtrInit->getType(), /*isConstant=*/true,
4067       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4068   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4069 
4070   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4071                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4072                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4073                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4074                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4075   std::string EntryName = getName({"omp_offloading", "entry", ""});
4076   llvm::GlobalVariable *Entry = createGlobalStruct(
4077       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4078       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4079 
4080   // The entry has to be created in the section the linker expects it to be.
4081   Entry->setSection("omp_offloading_entries");
4082 }
4083 
4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4085   // Emit the offloading entries and metadata so that the device codegen side
4086   // can easily figure out what to emit. The produced metadata looks like
4087   // this:
4088   //
4089   // !omp_offload.info = !{!1, ...}
4090   //
4091   // Right now we only generate metadata for function that contain target
4092   // regions.
4093 
4094   // If we are in simd mode or there are no entries, we don't need to do
4095   // anything.
4096   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4097     return;
4098 
4099   llvm::Module &M = CGM.getModule();
4100   llvm::LLVMContext &C = M.getContext();
4101   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4102                          SourceLocation, StringRef>,
4103               16>
4104       OrderedEntries(OffloadEntriesInfoManager.size());
4105   llvm::SmallVector<StringRef, 16> ParentFunctions(
4106       OffloadEntriesInfoManager.size());
4107 
4108   // Auxiliary methods to create metadata values and strings.
4109   auto &&GetMDInt = [this](unsigned V) {
4110     return llvm::ConstantAsMetadata::get(
4111         llvm::ConstantInt::get(CGM.Int32Ty, V));
4112   };
4113 
4114   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4115 
4116   // Create the offloading info metadata node.
4117   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4118 
4119   // Create function that emits metadata for each target region entry;
4120   auto &&TargetRegionMetadataEmitter =
4121       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4122        &GetMDString](
4123           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4124           unsigned Line,
4125           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4126         // Generate metadata for target regions. Each entry of this metadata
4127         // contains:
4128         // - Entry 0 -> Kind of this type of metadata (0).
4129         // - Entry 1 -> Device ID of the file where the entry was identified.
4130         // - Entry 2 -> File ID of the file where the entry was identified.
4131         // - Entry 3 -> Mangled name of the function where the entry was
4132         // identified.
4133         // - Entry 4 -> Line in the file where the entry was identified.
4134         // - Entry 5 -> Order the entry was created.
4135         // The first element of the metadata node is the kind.
4136         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4137                                  GetMDInt(FileID),      GetMDString(ParentName),
4138                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4139 
4140         SourceLocation Loc;
4141         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4142                   E = CGM.getContext().getSourceManager().fileinfo_end();
4143              I != E; ++I) {
4144           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4145               I->getFirst()->getUniqueID().getFile() == FileID) {
4146             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4147                 I->getFirst(), Line, 1);
4148             break;
4149           }
4150         }
4151         // Save this entry in the right position of the ordered entries array.
4152         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4153         ParentFunctions[E.getOrder()] = ParentName;
4154 
4155         // Add metadata to the named metadata node.
4156         MD->addOperand(llvm::MDNode::get(C, Ops));
4157       };
4158 
4159   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4160       TargetRegionMetadataEmitter);
4161 
4162   // Create function that emits metadata for each device global variable entry;
4163   auto &&DeviceGlobalVarMetadataEmitter =
4164       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4165        MD](StringRef MangledName,
4166            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4167                &E) {
4168         // Generate metadata for global variables. Each entry of this metadata
4169         // contains:
4170         // - Entry 0 -> Kind of this type of metadata (1).
4171         // - Entry 1 -> Mangled name of the variable.
4172         // - Entry 2 -> Declare target kind.
4173         // - Entry 3 -> Order the entry was created.
4174         // The first element of the metadata node is the kind.
4175         llvm::Metadata *Ops[] = {
4176             GetMDInt(E.getKind()), GetMDString(MangledName),
4177             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4178 
4179         // Save this entry in the right position of the ordered entries array.
4180         OrderedEntries[E.getOrder()] =
4181             std::make_tuple(&E, SourceLocation(), MangledName);
4182 
4183         // Add metadata to the named metadata node.
4184         MD->addOperand(llvm::MDNode::get(C, Ops));
4185       };
4186 
4187   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4188       DeviceGlobalVarMetadataEmitter);
4189 
4190   for (const auto &E : OrderedEntries) {
4191     assert(std::get<0>(E) && "All ordered entries must exist!");
4192     if (const auto *CE =
4193             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4194                 std::get<0>(E))) {
4195       if (!CE->getID() || !CE->getAddress()) {
4196         // Do not blame the entry if the parent funtion is not emitted.
4197         StringRef FnName = ParentFunctions[CE->getOrder()];
4198         if (!CGM.GetGlobalValue(FnName))
4199           continue;
4200         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4201             DiagnosticsEngine::Error,
4202             "Offloading entry for target region in %0 is incorrect: either the "
4203             "address or the ID is invalid.");
4204         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4205         continue;
4206       }
4207       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4208                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4209     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4210                                              OffloadEntryInfoDeviceGlobalVar>(
4211                    std::get<0>(E))) {
4212       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4213           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4214               CE->getFlags());
4215       switch (Flags) {
4216       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4217         if (CGM.getLangOpts().OpenMPIsDevice &&
4218             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4219           continue;
4220         if (!CE->getAddress()) {
4221           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4222               DiagnosticsEngine::Error, "Offloading entry for declare target "
4223                                         "variable %0 is incorrect: the "
4224                                         "address is invalid.");
4225           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4226           continue;
4227         }
4228         // The vaiable has no definition - no need to add the entry.
4229         if (CE->getVarSize().isZero())
4230           continue;
4231         break;
4232       }
4233       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4234         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4235                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4236                "Declaret target link address is set.");
4237         if (CGM.getLangOpts().OpenMPIsDevice)
4238           continue;
4239         if (!CE->getAddress()) {
4240           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4241               DiagnosticsEngine::Error,
4242               "Offloading entry for declare target variable is incorrect: the "
4243               "address is invalid.");
4244           CGM.getDiags().Report(DiagID);
4245           continue;
4246         }
4247         break;
4248       }
4249       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4250                          CE->getVarSize().getQuantity(), Flags,
4251                          CE->getLinkage());
4252     } else {
4253       llvm_unreachable("Unsupported entry kind.");
4254     }
4255   }
4256 }
4257 
4258 /// Loads all the offload entries information from the host IR
4259 /// metadata.
4260 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4261   // If we are in target mode, load the metadata from the host IR. This code has
4262   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4263 
4264   if (!CGM.getLangOpts().OpenMPIsDevice)
4265     return;
4266 
4267   if (CGM.getLangOpts().OMPHostIRFile.empty())
4268     return;
4269 
4270   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4271   if (auto EC = Buf.getError()) {
4272     CGM.getDiags().Report(diag::err_cannot_open_file)
4273         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4274     return;
4275   }
4276 
4277   llvm::LLVMContext C;
4278   auto ME = expectedToErrorOrAndEmitErrors(
4279       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4280 
4281   if (auto EC = ME.getError()) {
4282     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4283         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4284     CGM.getDiags().Report(DiagID)
4285         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4286     return;
4287   }
4288 
4289   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4290   if (!MD)
4291     return;
4292 
4293   for (llvm::MDNode *MN : MD->operands()) {
4294     auto &&GetMDInt = [MN](unsigned Idx) {
4295       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4296       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4297     };
4298 
4299     auto &&GetMDString = [MN](unsigned Idx) {
4300       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4301       return V->getString();
4302     };
4303 
4304     switch (GetMDInt(0)) {
4305     default:
4306       llvm_unreachable("Unexpected metadata!");
4307       break;
4308     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4309         OffloadingEntryInfoTargetRegion:
4310       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4311           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4312           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4313           /*Order=*/GetMDInt(5));
4314       break;
4315     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4316         OffloadingEntryInfoDeviceGlobalVar:
4317       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4318           /*MangledName=*/GetMDString(1),
4319           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4320               /*Flags=*/GetMDInt(2)),
4321           /*Order=*/GetMDInt(3));
4322       break;
4323     }
4324   }
4325 }
4326 
4327 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4328   if (!KmpRoutineEntryPtrTy) {
4329     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4330     ASTContext &C = CGM.getContext();
4331     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4332     FunctionProtoType::ExtProtoInfo EPI;
4333     KmpRoutineEntryPtrQTy = C.getPointerType(
4334         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4335     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4336   }
4337 }
4338 
4339 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4340   // Make sure the type of the entry is already created. This is the type we
4341   // have to create:
4342   // struct __tgt_offload_entry{
4343   //   void      *addr;       // Pointer to the offload entry info.
4344   //                          // (function or global)
4345   //   char      *name;       // Name of the function or global.
4346   //   size_t     size;       // Size of the entry info (0 if it a function).
4347   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4348   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4349   // };
4350   if (TgtOffloadEntryQTy.isNull()) {
4351     ASTContext &C = CGM.getContext();
4352     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4353     RD->startDefinition();
4354     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4355     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4356     addFieldToRecordDecl(C, RD, C.getSizeType());
4357     addFieldToRecordDecl(
4358         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4359     addFieldToRecordDecl(
4360         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4361     RD->completeDefinition();
4362     RD->addAttr(PackedAttr::CreateImplicit(C));
4363     TgtOffloadEntryQTy = C.getRecordType(RD);
4364   }
4365   return TgtOffloadEntryQTy;
4366 }
4367 
4368 namespace {
4369 struct PrivateHelpersTy {
4370   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4371                    const VarDecl *PrivateElemInit)
4372       : Original(Original), PrivateCopy(PrivateCopy),
4373         PrivateElemInit(PrivateElemInit) {}
4374   const VarDecl *Original;
4375   const VarDecl *PrivateCopy;
4376   const VarDecl *PrivateElemInit;
4377 };
4378 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4379 } // anonymous namespace
4380 
4381 static RecordDecl *
4382 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4383   if (!Privates.empty()) {
4384     ASTContext &C = CGM.getContext();
4385     // Build struct .kmp_privates_t. {
4386     //         /*  private vars  */
4387     //       };
4388     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4389     RD->startDefinition();
4390     for (const auto &Pair : Privates) {
4391       const VarDecl *VD = Pair.second.Original;
4392       QualType Type = VD->getType().getNonReferenceType();
4393       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4394       if (VD->hasAttrs()) {
4395         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4396              E(VD->getAttrs().end());
4397              I != E; ++I)
4398           FD->addAttr(*I);
4399       }
4400     }
4401     RD->completeDefinition();
4402     return RD;
4403   }
4404   return nullptr;
4405 }
4406 
4407 static RecordDecl *
4408 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4409                          QualType KmpInt32Ty,
4410                          QualType KmpRoutineEntryPointerQTy) {
4411   ASTContext &C = CGM.getContext();
4412   // Build struct kmp_task_t {
4413   //         void *              shareds;
4414   //         kmp_routine_entry_t routine;
4415   //         kmp_int32           part_id;
4416   //         kmp_cmplrdata_t data1;
4417   //         kmp_cmplrdata_t data2;
4418   // For taskloops additional fields:
4419   //         kmp_uint64          lb;
4420   //         kmp_uint64          ub;
4421   //         kmp_int64           st;
4422   //         kmp_int32           liter;
4423   //         void *              reductions;
4424   //       };
4425   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4426   UD->startDefinition();
4427   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4428   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4429   UD->completeDefinition();
4430   QualType KmpCmplrdataTy = C.getRecordType(UD);
4431   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4432   RD->startDefinition();
4433   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4434   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4435   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4436   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4437   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4438   if (isOpenMPTaskLoopDirective(Kind)) {
4439     QualType KmpUInt64Ty =
4440         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4441     QualType KmpInt64Ty =
4442         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4443     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4444     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4445     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4446     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4447     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4448   }
4449   RD->completeDefinition();
4450   return RD;
4451 }
4452 
4453 static RecordDecl *
4454 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4455                                      ArrayRef<PrivateDataTy> Privates) {
4456   ASTContext &C = CGM.getContext();
4457   // Build struct kmp_task_t_with_privates {
4458   //         kmp_task_t task_data;
4459   //         .kmp_privates_t. privates;
4460   //       };
4461   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4462   RD->startDefinition();
4463   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4464   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4465     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4466   RD->completeDefinition();
4467   return RD;
4468 }
4469 
4470 /// Emit a proxy function which accepts kmp_task_t as the second
4471 /// argument.
4472 /// \code
4473 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4474 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4475 ///   For taskloops:
4476 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4477 ///   tt->reductions, tt->shareds);
4478 ///   return 0;
4479 /// }
4480 /// \endcode
4481 static llvm::Function *
4482 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4483                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4484                       QualType KmpTaskTWithPrivatesPtrQTy,
4485                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4486                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4487                       llvm::Value *TaskPrivatesMap) {
4488   ASTContext &C = CGM.getContext();
4489   FunctionArgList Args;
4490   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4491                             ImplicitParamDecl::Other);
4492   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4493                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4494                                 ImplicitParamDecl::Other);
4495   Args.push_back(&GtidArg);
4496   Args.push_back(&TaskTypeArg);
4497   const auto &TaskEntryFnInfo =
4498       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4499   llvm::FunctionType *TaskEntryTy =
4500       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4501   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4502   auto *TaskEntry = llvm::Function::Create(
4503       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4504   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4505   TaskEntry->setDoesNotRecurse();
4506   CodeGenFunction CGF(CGM);
4507   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4508                     Loc, Loc);
4509 
4510   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4511   // tt,
4512   // For taskloops:
4513   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4514   // tt->task_data.shareds);
4515   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4516       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4517   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4518       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4519       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4520   const auto *KmpTaskTWithPrivatesQTyRD =
4521       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4522   LValue Base =
4523       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4524   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4525   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4526   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4527   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4528 
4529   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4530   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4531   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4532       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4533       CGF.ConvertTypeForMem(SharedsPtrTy));
4534 
4535   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4536   llvm::Value *PrivatesParam;
4537   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4538     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4539     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4540         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4541   } else {
4542     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4543   }
4544 
4545   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4546                                TaskPrivatesMap,
4547                                CGF.Builder
4548                                    .CreatePointerBitCastOrAddrSpaceCast(
4549                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4550                                    .getPointer()};
4551   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4552                                           std::end(CommonArgs));
4553   if (isOpenMPTaskLoopDirective(Kind)) {
4554     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4555     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4556     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4557     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4558     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4559     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4560     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4561     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4562     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4563     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4564     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4565     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4566     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4567     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4568     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4569     CallArgs.push_back(LBParam);
4570     CallArgs.push_back(UBParam);
4571     CallArgs.push_back(StParam);
4572     CallArgs.push_back(LIParam);
4573     CallArgs.push_back(RParam);
4574   }
4575   CallArgs.push_back(SharedsParam);
4576 
4577   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4578                                                   CallArgs);
4579   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4580                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4581   CGF.FinishFunction();
4582   return TaskEntry;
4583 }
4584 
4585 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4586                                             SourceLocation Loc,
4587                                             QualType KmpInt32Ty,
4588                                             QualType KmpTaskTWithPrivatesPtrQTy,
4589                                             QualType KmpTaskTWithPrivatesQTy) {
4590   ASTContext &C = CGM.getContext();
4591   FunctionArgList Args;
4592   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4593                             ImplicitParamDecl::Other);
4594   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4595                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4596                                 ImplicitParamDecl::Other);
4597   Args.push_back(&GtidArg);
4598   Args.push_back(&TaskTypeArg);
4599   const auto &DestructorFnInfo =
4600       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4601   llvm::FunctionType *DestructorFnTy =
4602       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4603   std::string Name =
4604       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4605   auto *DestructorFn =
4606       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4607                              Name, &CGM.getModule());
4608   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4609                                     DestructorFnInfo);
4610   DestructorFn->setDoesNotRecurse();
4611   CodeGenFunction CGF(CGM);
4612   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4613                     Args, Loc, Loc);
4614 
4615   LValue Base = CGF.EmitLoadOfPointerLValue(
4616       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4617       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4618   const auto *KmpTaskTWithPrivatesQTyRD =
4619       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4620   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4621   Base = CGF.EmitLValueForField(Base, *FI);
4622   for (const auto *Field :
4623        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4624     if (QualType::DestructionKind DtorKind =
4625             Field->getType().isDestructedType()) {
4626       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4627       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4628     }
4629   }
4630   CGF.FinishFunction();
4631   return DestructorFn;
4632 }
4633 
4634 /// Emit a privates mapping function for correct handling of private and
4635 /// firstprivate variables.
4636 /// \code
4637 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4638 /// **noalias priv1,...,  <tyn> **noalias privn) {
4639 ///   *priv1 = &.privates.priv1;
4640 ///   ...;
4641 ///   *privn = &.privates.privn;
4642 /// }
4643 /// \endcode
4644 static llvm::Value *
4645 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4646                                ArrayRef<const Expr *> PrivateVars,
4647                                ArrayRef<const Expr *> FirstprivateVars,
4648                                ArrayRef<const Expr *> LastprivateVars,
4649                                QualType PrivatesQTy,
4650                                ArrayRef<PrivateDataTy> Privates) {
4651   ASTContext &C = CGM.getContext();
4652   FunctionArgList Args;
4653   ImplicitParamDecl TaskPrivatesArg(
4654       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4655       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4656       ImplicitParamDecl::Other);
4657   Args.push_back(&TaskPrivatesArg);
4658   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4659   unsigned Counter = 1;
4660   for (const Expr *E : PrivateVars) {
4661     Args.push_back(ImplicitParamDecl::Create(
4662         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4663         C.getPointerType(C.getPointerType(E->getType()))
4664             .withConst()
4665             .withRestrict(),
4666         ImplicitParamDecl::Other));
4667     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4668     PrivateVarsPos[VD] = Counter;
4669     ++Counter;
4670   }
4671   for (const Expr *E : FirstprivateVars) {
4672     Args.push_back(ImplicitParamDecl::Create(
4673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4674         C.getPointerType(C.getPointerType(E->getType()))
4675             .withConst()
4676             .withRestrict(),
4677         ImplicitParamDecl::Other));
4678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4679     PrivateVarsPos[VD] = Counter;
4680     ++Counter;
4681   }
4682   for (const Expr *E : LastprivateVars) {
4683     Args.push_back(ImplicitParamDecl::Create(
4684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4685         C.getPointerType(C.getPointerType(E->getType()))
4686             .withConst()
4687             .withRestrict(),
4688         ImplicitParamDecl::Other));
4689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4690     PrivateVarsPos[VD] = Counter;
4691     ++Counter;
4692   }
4693   const auto &TaskPrivatesMapFnInfo =
4694       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4695   llvm::FunctionType *TaskPrivatesMapTy =
4696       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4697   std::string Name =
4698       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4699   auto *TaskPrivatesMap = llvm::Function::Create(
4700       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4701       &CGM.getModule());
4702   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4703                                     TaskPrivatesMapFnInfo);
4704   if (CGM.getLangOpts().Optimize) {
4705     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4706     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4707     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4708   }
4709   CodeGenFunction CGF(CGM);
4710   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4711                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4712 
4713   // *privi = &.privates.privi;
4714   LValue Base = CGF.EmitLoadOfPointerLValue(
4715       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4716       TaskPrivatesArg.getType()->castAs<PointerType>());
4717   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4718   Counter = 0;
4719   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4720     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4721     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4722     LValue RefLVal =
4723         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4724     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4725         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4726     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4727     ++Counter;
4728   }
4729   CGF.FinishFunction();
4730   return TaskPrivatesMap;
4731 }
4732 
4733 /// Emit initialization for private variables in task-based directives.
4734 static void emitPrivatesInit(CodeGenFunction &CGF,
4735                              const OMPExecutableDirective &D,
4736                              Address KmpTaskSharedsPtr, LValue TDBase,
4737                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4738                              QualType SharedsTy, QualType SharedsPtrTy,
4739                              const OMPTaskDataTy &Data,
4740                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4741   ASTContext &C = CGF.getContext();
4742   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4743   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4744   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4745                                  ? OMPD_taskloop
4746                                  : OMPD_task;
4747   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4748   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4749   LValue SrcBase;
4750   bool IsTargetTask =
4751       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4752       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4753   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4754   // PointersArray and SizesArray. The original variables for these arrays are
4755   // not captured and we get their addresses explicitly.
4756   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4757       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4758     SrcBase = CGF.MakeAddrLValue(
4759         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4760             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4761         SharedsTy);
4762   }
4763   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4764   for (const PrivateDataTy &Pair : Privates) {
4765     const VarDecl *VD = Pair.second.PrivateCopy;
4766     const Expr *Init = VD->getAnyInitializer();
4767     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4768                              !CGF.isTrivialInitializer(Init)))) {
4769       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4770       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4771         const VarDecl *OriginalVD = Pair.second.Original;
4772         // Check if the variable is the target-based BasePointersArray,
4773         // PointersArray or SizesArray.
4774         LValue SharedRefLValue;
4775         QualType Type = PrivateLValue.getType();
4776         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4777         if (IsTargetTask && !SharedField) {
4778           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4779                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4780                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4781                          ->getNumParams() == 0 &&
4782                  isa<TranslationUnitDecl>(
4783                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4784                          ->getDeclContext()) &&
4785                  "Expected artificial target data variable.");
4786           SharedRefLValue =
4787               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4788         } else {
4789           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4790           SharedRefLValue = CGF.MakeAddrLValue(
4791               Address(SharedRefLValue.getPointer(CGF),
4792                       C.getDeclAlign(OriginalVD)),
4793               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4794               SharedRefLValue.getTBAAInfo());
4795         }
4796         if (Type->isArrayType()) {
4797           // Initialize firstprivate array.
4798           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4799             // Perform simple memcpy.
4800             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4801           } else {
4802             // Initialize firstprivate array using element-by-element
4803             // initialization.
4804             CGF.EmitOMPAggregateAssign(
4805                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4806                 Type,
4807                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4808                                                   Address SrcElement) {
4809                   // Clean up any temporaries needed by the initialization.
4810                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4811                   InitScope.addPrivate(
4812                       Elem, [SrcElement]() -> Address { return SrcElement; });
4813                   (void)InitScope.Privatize();
4814                   // Emit initialization for single element.
4815                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4816                       CGF, &CapturesInfo);
4817                   CGF.EmitAnyExprToMem(Init, DestElement,
4818                                        Init->getType().getQualifiers(),
4819                                        /*IsInitializer=*/false);
4820                 });
4821           }
4822         } else {
4823           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4824           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4825             return SharedRefLValue.getAddress(CGF);
4826           });
4827           (void)InitScope.Privatize();
4828           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4829           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4830                              /*capturedByInit=*/false);
4831         }
4832       } else {
4833         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4834       }
4835     }
4836     ++FI;
4837   }
4838 }
4839 
4840 /// Check if duplication function is required for taskloops.
4841 static bool checkInitIsRequired(CodeGenFunction &CGF,
4842                                 ArrayRef<PrivateDataTy> Privates) {
4843   bool InitRequired = false;
4844   for (const PrivateDataTy &Pair : Privates) {
4845     const VarDecl *VD = Pair.second.PrivateCopy;
4846     const Expr *Init = VD->getAnyInitializer();
4847     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4848                                     !CGF.isTrivialInitializer(Init));
4849     if (InitRequired)
4850       break;
4851   }
4852   return InitRequired;
4853 }
4854 
4855 
4856 /// Emit task_dup function (for initialization of
4857 /// private/firstprivate/lastprivate vars and last_iter flag)
4858 /// \code
4859 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4860 /// lastpriv) {
4861 /// // setup lastprivate flag
4862 ///    task_dst->last = lastpriv;
4863 /// // could be constructor calls here...
4864 /// }
4865 /// \endcode
4866 static llvm::Value *
4867 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4868                     const OMPExecutableDirective &D,
4869                     QualType KmpTaskTWithPrivatesPtrQTy,
4870                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4871                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4872                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4873                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4874   ASTContext &C = CGM.getContext();
4875   FunctionArgList Args;
4876   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4877                            KmpTaskTWithPrivatesPtrQTy,
4878                            ImplicitParamDecl::Other);
4879   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4880                            KmpTaskTWithPrivatesPtrQTy,
4881                            ImplicitParamDecl::Other);
4882   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4883                                 ImplicitParamDecl::Other);
4884   Args.push_back(&DstArg);
4885   Args.push_back(&SrcArg);
4886   Args.push_back(&LastprivArg);
4887   const auto &TaskDupFnInfo =
4888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4889   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4890   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4891   auto *TaskDup = llvm::Function::Create(
4892       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4893   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4894   TaskDup->setDoesNotRecurse();
4895   CodeGenFunction CGF(CGM);
4896   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4897                     Loc);
4898 
4899   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4900       CGF.GetAddrOfLocalVar(&DstArg),
4901       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4902   // task_dst->liter = lastpriv;
4903   if (WithLastIter) {
4904     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4905     LValue Base = CGF.EmitLValueForField(
4906         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4907     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4908     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4909         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4910     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4911   }
4912 
4913   // Emit initial values for private copies (if any).
4914   assert(!Privates.empty());
4915   Address KmpTaskSharedsPtr = Address::invalid();
4916   if (!Data.FirstprivateVars.empty()) {
4917     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4918         CGF.GetAddrOfLocalVar(&SrcArg),
4919         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4920     LValue Base = CGF.EmitLValueForField(
4921         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4922     KmpTaskSharedsPtr = Address(
4923         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4924                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4925                                                   KmpTaskTShareds)),
4926                              Loc),
4927         CGF.getNaturalTypeAlignment(SharedsTy));
4928   }
4929   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4930                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4931   CGF.FinishFunction();
4932   return TaskDup;
4933 }
4934 
4935 /// Checks if destructor function is required to be generated.
4936 /// \return true if cleanups are required, false otherwise.
4937 static bool
4938 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4939   bool NeedsCleanup = false;
4940   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4941   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4942   for (const FieldDecl *FD : PrivateRD->fields()) {
4943     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4944     if (NeedsCleanup)
4945       break;
4946   }
4947   return NeedsCleanup;
4948 }
4949 
4950 CGOpenMPRuntime::TaskResultTy
4951 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4952                               const OMPExecutableDirective &D,
4953                               llvm::Function *TaskFunction, QualType SharedsTy,
4954                               Address Shareds, const OMPTaskDataTy &Data) {
4955   ASTContext &C = CGM.getContext();
4956   llvm::SmallVector<PrivateDataTy, 4> Privates;
4957   // Aggregate privates and sort them by the alignment.
4958   auto I = Data.PrivateCopies.begin();
4959   for (const Expr *E : Data.PrivateVars) {
4960     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4961     Privates.emplace_back(
4962         C.getDeclAlign(VD),
4963         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4964                          /*PrivateElemInit=*/nullptr));
4965     ++I;
4966   }
4967   I = Data.FirstprivateCopies.begin();
4968   auto IElemInitRef = Data.FirstprivateInits.begin();
4969   for (const Expr *E : Data.FirstprivateVars) {
4970     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4971     Privates.emplace_back(
4972         C.getDeclAlign(VD),
4973         PrivateHelpersTy(
4974             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4975             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4976     ++I;
4977     ++IElemInitRef;
4978   }
4979   I = Data.LastprivateCopies.begin();
4980   for (const Expr *E : Data.LastprivateVars) {
4981     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4982     Privates.emplace_back(
4983         C.getDeclAlign(VD),
4984         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4985                          /*PrivateElemInit=*/nullptr));
4986     ++I;
4987   }
4988   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4989     return L.first > R.first;
4990   });
4991   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4992   // Build type kmp_routine_entry_t (if not built yet).
4993   emitKmpRoutineEntryT(KmpInt32Ty);
4994   // Build type kmp_task_t (if not built yet).
4995   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4996     if (SavedKmpTaskloopTQTy.isNull()) {
4997       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4998           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4999     }
5000     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5001   } else {
5002     assert((D.getDirectiveKind() == OMPD_task ||
5003             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5004             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5005            "Expected taskloop, task or target directive");
5006     if (SavedKmpTaskTQTy.isNull()) {
5007       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5008           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5009     }
5010     KmpTaskTQTy = SavedKmpTaskTQTy;
5011   }
5012   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5013   // Build particular struct kmp_task_t for the given task.
5014   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5015       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5016   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5017   QualType KmpTaskTWithPrivatesPtrQTy =
5018       C.getPointerType(KmpTaskTWithPrivatesQTy);
5019   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5020   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5021       KmpTaskTWithPrivatesTy->getPointerTo();
5022   llvm::Value *KmpTaskTWithPrivatesTySize =
5023       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5024   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5025 
5026   // Emit initial values for private copies (if any).
5027   llvm::Value *TaskPrivatesMap = nullptr;
5028   llvm::Type *TaskPrivatesMapTy =
5029       std::next(TaskFunction->arg_begin(), 3)->getType();
5030   if (!Privates.empty()) {
5031     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5032     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5033         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5034         FI->getType(), Privates);
5035     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5036         TaskPrivatesMap, TaskPrivatesMapTy);
5037   } else {
5038     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5039         cast<llvm::PointerType>(TaskPrivatesMapTy));
5040   }
5041   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5042   // kmp_task_t *tt);
5043   llvm::Function *TaskEntry = emitProxyTaskFunction(
5044       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5045       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5046       TaskPrivatesMap);
5047 
5048   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5049   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5050   // kmp_routine_entry_t *task_entry);
5051   // Task flags. Format is taken from
5052   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5053   // description of kmp_tasking_flags struct.
5054   enum {
5055     TiedFlag = 0x1,
5056     FinalFlag = 0x2,
5057     DestructorsFlag = 0x8,
5058     PriorityFlag = 0x20
5059   };
5060   unsigned Flags = Data.Tied ? TiedFlag : 0;
5061   bool NeedsCleanup = false;
5062   if (!Privates.empty()) {
5063     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5064     if (NeedsCleanup)
5065       Flags = Flags | DestructorsFlag;
5066   }
5067   if (Data.Priority.getInt())
5068     Flags = Flags | PriorityFlag;
5069   llvm::Value *TaskFlags =
5070       Data.Final.getPointer()
5071           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5072                                      CGF.Builder.getInt32(FinalFlag),
5073                                      CGF.Builder.getInt32(/*C=*/0))
5074           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5075   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5076   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5077   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5078       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5079       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5080           TaskEntry, KmpRoutineEntryPtrTy)};
5081   llvm::Value *NewTask;
5082   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5083     // Check if we have any device clause associated with the directive.
5084     const Expr *Device = nullptr;
5085     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5086       Device = C->getDevice();
5087     // Emit device ID if any otherwise use default value.
5088     llvm::Value *DeviceID;
5089     if (Device)
5090       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5091                                            CGF.Int64Ty, /*isSigned=*/true);
5092     else
5093       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5094     AllocArgs.push_back(DeviceID);
5095     NewTask = CGF.EmitRuntimeCall(
5096       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5097   } else {
5098     NewTask = CGF.EmitRuntimeCall(
5099       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5100   }
5101   llvm::Value *NewTaskNewTaskTTy =
5102       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5103           NewTask, KmpTaskTWithPrivatesPtrTy);
5104   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5105                                                KmpTaskTWithPrivatesQTy);
5106   LValue TDBase =
5107       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5108   // Fill the data in the resulting kmp_task_t record.
5109   // Copy shareds if there are any.
5110   Address KmpTaskSharedsPtr = Address::invalid();
5111   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5112     KmpTaskSharedsPtr =
5113         Address(CGF.EmitLoadOfScalar(
5114                     CGF.EmitLValueForField(
5115                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5116                                            KmpTaskTShareds)),
5117                     Loc),
5118                 CGF.getNaturalTypeAlignment(SharedsTy));
5119     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5120     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5121     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5122   }
5123   // Emit initial values for private copies (if any).
5124   TaskResultTy Result;
5125   if (!Privates.empty()) {
5126     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5127                      SharedsTy, SharedsPtrTy, Data, Privates,
5128                      /*ForDup=*/false);
5129     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5130         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5131       Result.TaskDupFn = emitTaskDupFunction(
5132           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5133           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5134           /*WithLastIter=*/!Data.LastprivateVars.empty());
5135     }
5136   }
5137   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5138   enum { Priority = 0, Destructors = 1 };
5139   // Provide pointer to function with destructors for privates.
5140   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5141   const RecordDecl *KmpCmplrdataUD =
5142       (*FI)->getType()->getAsUnionType()->getDecl();
5143   if (NeedsCleanup) {
5144     llvm::Value *DestructorFn = emitDestructorsFunction(
5145         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5146         KmpTaskTWithPrivatesQTy);
5147     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5148     LValue DestructorsLV = CGF.EmitLValueForField(
5149         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5150     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5151                               DestructorFn, KmpRoutineEntryPtrTy),
5152                           DestructorsLV);
5153   }
5154   // Set priority.
5155   if (Data.Priority.getInt()) {
5156     LValue Data2LV = CGF.EmitLValueForField(
5157         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5158     LValue PriorityLV = CGF.EmitLValueForField(
5159         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5160     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5161   }
5162   Result.NewTask = NewTask;
5163   Result.TaskEntry = TaskEntry;
5164   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5165   Result.TDBase = TDBase;
5166   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5167   return Result;
5168 }
5169 
5170 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5171                                    const OMPExecutableDirective &D,
5172                                    llvm::Function *TaskFunction,
5173                                    QualType SharedsTy, Address Shareds,
5174                                    const Expr *IfCond,
5175                                    const OMPTaskDataTy &Data) {
5176   if (!CGF.HaveInsertPoint())
5177     return;
5178 
5179   TaskResultTy Result =
5180       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5181   llvm::Value *NewTask = Result.NewTask;
5182   llvm::Function *TaskEntry = Result.TaskEntry;
5183   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5184   LValue TDBase = Result.TDBase;
5185   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5186   ASTContext &C = CGM.getContext();
5187   // Process list of dependences.
5188   Address DependenciesArray = Address::invalid();
5189   unsigned NumDependencies = Data.Dependences.size();
5190   if (NumDependencies) {
5191     // Dependence kind for RTL.
5192     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5193     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5194     RecordDecl *KmpDependInfoRD;
5195     QualType FlagsTy =
5196         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5197     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5198     if (KmpDependInfoTy.isNull()) {
5199       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5200       KmpDependInfoRD->startDefinition();
5201       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5202       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5203       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5204       KmpDependInfoRD->completeDefinition();
5205       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5206     } else {
5207       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5208     }
5209     // Define type kmp_depend_info[<Dependences.size()>];
5210     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5211         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5212         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5213     // kmp_depend_info[<Dependences.size()>] deps;
5214     DependenciesArray =
5215         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5216     for (unsigned I = 0; I < NumDependencies; ++I) {
5217       const Expr *E = Data.Dependences[I].second;
5218       LValue Addr = CGF.EmitLValue(E);
5219       llvm::Value *Size;
5220       QualType Ty = E->getType();
5221       if (const auto *ASE =
5222               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5223         LValue UpAddrLVal =
5224             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5225         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5226             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5227         llvm::Value *LowIntPtr =
5228             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5229         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5230         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5231       } else {
5232         Size = CGF.getTypeSize(Ty);
5233       }
5234       LValue Base = CGF.MakeAddrLValue(
5235           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5236           KmpDependInfoTy);
5237       // deps[i].base_addr = &<Dependences[i].second>;
5238       LValue BaseAddrLVal = CGF.EmitLValueForField(
5239           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5240       CGF.EmitStoreOfScalar(
5241           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5242           BaseAddrLVal);
5243       // deps[i].len = sizeof(<Dependences[i].second>);
5244       LValue LenLVal = CGF.EmitLValueForField(
5245           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5246       CGF.EmitStoreOfScalar(Size, LenLVal);
5247       // deps[i].flags = <Dependences[i].first>;
5248       RTLDependenceKindTy DepKind;
5249       switch (Data.Dependences[I].first) {
5250       case OMPC_DEPEND_in:
5251         DepKind = DepIn;
5252         break;
5253       // Out and InOut dependencies must use the same code.
5254       case OMPC_DEPEND_out:
5255       case OMPC_DEPEND_inout:
5256         DepKind = DepInOut;
5257         break;
5258       case OMPC_DEPEND_mutexinoutset:
5259         DepKind = DepMutexInOutSet;
5260         break;
5261       case OMPC_DEPEND_source:
5262       case OMPC_DEPEND_sink:
5263       case OMPC_DEPEND_unknown:
5264         llvm_unreachable("Unknown task dependence type");
5265       }
5266       LValue FlagsLVal = CGF.EmitLValueForField(
5267           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5268       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5269                             FlagsLVal);
5270     }
5271     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5272         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5273   }
5274 
5275   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5276   // libcall.
5277   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5278   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5279   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5280   // list is not empty
5281   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5282   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5283   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5284   llvm::Value *DepTaskArgs[7];
5285   if (NumDependencies) {
5286     DepTaskArgs[0] = UpLoc;
5287     DepTaskArgs[1] = ThreadID;
5288     DepTaskArgs[2] = NewTask;
5289     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5290     DepTaskArgs[4] = DependenciesArray.getPointer();
5291     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5292     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5293   }
5294   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5295                         &TaskArgs,
5296                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5297     if (!Data.Tied) {
5298       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5299       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5300       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5301     }
5302     if (NumDependencies) {
5303       CGF.EmitRuntimeCall(
5304           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5305     } else {
5306       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5307                           TaskArgs);
5308     }
5309     // Check if parent region is untied and build return for untied task;
5310     if (auto *Region =
5311             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5312       Region->emitUntiedSwitch(CGF);
5313   };
5314 
5315   llvm::Value *DepWaitTaskArgs[6];
5316   if (NumDependencies) {
5317     DepWaitTaskArgs[0] = UpLoc;
5318     DepWaitTaskArgs[1] = ThreadID;
5319     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5320     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5321     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5322     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5323   }
5324   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5325                         NumDependencies, &DepWaitTaskArgs,
5326                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5327     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5328     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5329     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5330     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5331     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5332     // is specified.
5333     if (NumDependencies)
5334       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5335                           DepWaitTaskArgs);
5336     // Call proxy_task_entry(gtid, new_task);
5337     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5338                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5339       Action.Enter(CGF);
5340       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5341       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5342                                                           OutlinedFnArgs);
5343     };
5344 
5345     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5346     // kmp_task_t *new_task);
5347     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5348     // kmp_task_t *new_task);
5349     RegionCodeGenTy RCG(CodeGen);
5350     CommonActionTy Action(
5351         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5352         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5353     RCG.setAction(Action);
5354     RCG(CGF);
5355   };
5356 
5357   if (IfCond) {
5358     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5359   } else {
5360     RegionCodeGenTy ThenRCG(ThenCodeGen);
5361     ThenRCG(CGF);
5362   }
5363 }
5364 
5365 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5366                                        const OMPLoopDirective &D,
5367                                        llvm::Function *TaskFunction,
5368                                        QualType SharedsTy, Address Shareds,
5369                                        const Expr *IfCond,
5370                                        const OMPTaskDataTy &Data) {
5371   if (!CGF.HaveInsertPoint())
5372     return;
5373   TaskResultTy Result =
5374       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5375   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5376   // libcall.
5377   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5378   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5379   // sched, kmp_uint64 grainsize, void *task_dup);
5380   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5381   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5382   llvm::Value *IfVal;
5383   if (IfCond) {
5384     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5385                                       /*isSigned=*/true);
5386   } else {
5387     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5388   }
5389 
5390   LValue LBLVal = CGF.EmitLValueForField(
5391       Result.TDBase,
5392       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5393   const auto *LBVar =
5394       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5395   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5396                        LBLVal.getQuals(),
5397                        /*IsInitializer=*/true);
5398   LValue UBLVal = CGF.EmitLValueForField(
5399       Result.TDBase,
5400       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5401   const auto *UBVar =
5402       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5403   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5404                        UBLVal.getQuals(),
5405                        /*IsInitializer=*/true);
5406   LValue StLVal = CGF.EmitLValueForField(
5407       Result.TDBase,
5408       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5409   const auto *StVar =
5410       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5411   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5412                        StLVal.getQuals(),
5413                        /*IsInitializer=*/true);
5414   // Store reductions address.
5415   LValue RedLVal = CGF.EmitLValueForField(
5416       Result.TDBase,
5417       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5418   if (Data.Reductions) {
5419     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5420   } else {
5421     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5422                                CGF.getContext().VoidPtrTy);
5423   }
5424   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5425   llvm::Value *TaskArgs[] = {
5426       UpLoc,
5427       ThreadID,
5428       Result.NewTask,
5429       IfVal,
5430       LBLVal.getPointer(CGF),
5431       UBLVal.getPointer(CGF),
5432       CGF.EmitLoadOfScalar(StLVal, Loc),
5433       llvm::ConstantInt::getSigned(
5434           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5435       llvm::ConstantInt::getSigned(
5436           CGF.IntTy, Data.Schedule.getPointer()
5437                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5438                          : NoSchedule),
5439       Data.Schedule.getPointer()
5440           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5441                                       /*isSigned=*/false)
5442           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5443       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444                              Result.TaskDupFn, CGF.VoidPtrTy)
5445                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5446   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5447 }
5448 
5449 /// Emit reduction operation for each element of array (required for
5450 /// array sections) LHS op = RHS.
5451 /// \param Type Type of array.
5452 /// \param LHSVar Variable on the left side of the reduction operation
5453 /// (references element of array in original variable).
5454 /// \param RHSVar Variable on the right side of the reduction operation
5455 /// (references element of array in original variable).
5456 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5457 /// RHSVar.
5458 static void EmitOMPAggregateReduction(
5459     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5460     const VarDecl *RHSVar,
5461     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5462                                   const Expr *, const Expr *)> &RedOpGen,
5463     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5464     const Expr *UpExpr = nullptr) {
5465   // Perform element-by-element initialization.
5466   QualType ElementTy;
5467   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5468   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5469 
5470   // Drill down to the base element type on both arrays.
5471   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5472   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5473 
5474   llvm::Value *RHSBegin = RHSAddr.getPointer();
5475   llvm::Value *LHSBegin = LHSAddr.getPointer();
5476   // Cast from pointer to array type to pointer to single element.
5477   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5478   // The basic structure here is a while-do loop.
5479   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5480   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5481   llvm::Value *IsEmpty =
5482       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5483   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5484 
5485   // Enter the loop body, making that address the current address.
5486   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5487   CGF.EmitBlock(BodyBB);
5488 
5489   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5490 
5491   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5492       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5493   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5494   Address RHSElementCurrent =
5495       Address(RHSElementPHI,
5496               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5497 
5498   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5499       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5500   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5501   Address LHSElementCurrent =
5502       Address(LHSElementPHI,
5503               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5504 
5505   // Emit copy.
5506   CodeGenFunction::OMPPrivateScope Scope(CGF);
5507   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5508   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5509   Scope.Privatize();
5510   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5511   Scope.ForceCleanup();
5512 
5513   // Shift the address forward by one element.
5514   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5515       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5516   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5517       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5518   // Check whether we've reached the end.
5519   llvm::Value *Done =
5520       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5521   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5522   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5523   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5524 
5525   // Done.
5526   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5527 }
5528 
5529 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5530 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5531 /// UDR combiner function.
5532 static void emitReductionCombiner(CodeGenFunction &CGF,
5533                                   const Expr *ReductionOp) {
5534   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5535     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5536       if (const auto *DRE =
5537               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5538         if (const auto *DRD =
5539                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5540           std::pair<llvm::Function *, llvm::Function *> Reduction =
5541               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5542           RValue Func = RValue::get(Reduction.first);
5543           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5544           CGF.EmitIgnoredExpr(ReductionOp);
5545           return;
5546         }
5547   CGF.EmitIgnoredExpr(ReductionOp);
5548 }
5549 
5550 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5551     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5552     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5553     ArrayRef<const Expr *> ReductionOps) {
5554   ASTContext &C = CGM.getContext();
5555 
5556   // void reduction_func(void *LHSArg, void *RHSArg);
5557   FunctionArgList Args;
5558   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5559                            ImplicitParamDecl::Other);
5560   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5561                            ImplicitParamDecl::Other);
5562   Args.push_back(&LHSArg);
5563   Args.push_back(&RHSArg);
5564   const auto &CGFI =
5565       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5566   std::string Name = getName({"omp", "reduction", "reduction_func"});
5567   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5568                                     llvm::GlobalValue::InternalLinkage, Name,
5569                                     &CGM.getModule());
5570   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5571   Fn->setDoesNotRecurse();
5572   CodeGenFunction CGF(CGM);
5573   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5574 
5575   // Dst = (void*[n])(LHSArg);
5576   // Src = (void*[n])(RHSArg);
5577   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5578       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5579       ArgsType), CGF.getPointerAlign());
5580   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5581       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5582       ArgsType), CGF.getPointerAlign());
5583 
5584   //  ...
5585   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5586   //  ...
5587   CodeGenFunction::OMPPrivateScope Scope(CGF);
5588   auto IPriv = Privates.begin();
5589   unsigned Idx = 0;
5590   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5591     const auto *RHSVar =
5592         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5593     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5594       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5595     });
5596     const auto *LHSVar =
5597         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5598     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5599       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5600     });
5601     QualType PrivTy = (*IPriv)->getType();
5602     if (PrivTy->isVariablyModifiedType()) {
5603       // Get array size and emit VLA type.
5604       ++Idx;
5605       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5606       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5607       const VariableArrayType *VLA =
5608           CGF.getContext().getAsVariableArrayType(PrivTy);
5609       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5610       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5611           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5612       CGF.EmitVariablyModifiedType(PrivTy);
5613     }
5614   }
5615   Scope.Privatize();
5616   IPriv = Privates.begin();
5617   auto ILHS = LHSExprs.begin();
5618   auto IRHS = RHSExprs.begin();
5619   for (const Expr *E : ReductionOps) {
5620     if ((*IPriv)->getType()->isArrayType()) {
5621       // Emit reduction for array section.
5622       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5623       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5624       EmitOMPAggregateReduction(
5625           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5626           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5627             emitReductionCombiner(CGF, E);
5628           });
5629     } else {
5630       // Emit reduction for array subscript or single variable.
5631       emitReductionCombiner(CGF, E);
5632     }
5633     ++IPriv;
5634     ++ILHS;
5635     ++IRHS;
5636   }
5637   Scope.ForceCleanup();
5638   CGF.FinishFunction();
5639   return Fn;
5640 }
5641 
5642 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5643                                                   const Expr *ReductionOp,
5644                                                   const Expr *PrivateRef,
5645                                                   const DeclRefExpr *LHS,
5646                                                   const DeclRefExpr *RHS) {
5647   if (PrivateRef->getType()->isArrayType()) {
5648     // Emit reduction for array section.
5649     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5650     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5651     EmitOMPAggregateReduction(
5652         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5653         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5654           emitReductionCombiner(CGF, ReductionOp);
5655         });
5656   } else {
5657     // Emit reduction for array subscript or single variable.
5658     emitReductionCombiner(CGF, ReductionOp);
5659   }
5660 }
5661 
5662 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5663                                     ArrayRef<const Expr *> Privates,
5664                                     ArrayRef<const Expr *> LHSExprs,
5665                                     ArrayRef<const Expr *> RHSExprs,
5666                                     ArrayRef<const Expr *> ReductionOps,
5667                                     ReductionOptionsTy Options) {
5668   if (!CGF.HaveInsertPoint())
5669     return;
5670 
5671   bool WithNowait = Options.WithNowait;
5672   bool SimpleReduction = Options.SimpleReduction;
5673 
5674   // Next code should be emitted for reduction:
5675   //
5676   // static kmp_critical_name lock = { 0 };
5677   //
5678   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5679   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5680   //  ...
5681   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5682   //  *(Type<n>-1*)rhs[<n>-1]);
5683   // }
5684   //
5685   // ...
5686   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5687   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5688   // RedList, reduce_func, &<lock>)) {
5689   // case 1:
5690   //  ...
5691   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5692   //  ...
5693   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5694   // break;
5695   // case 2:
5696   //  ...
5697   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5698   //  ...
5699   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5700   // break;
5701   // default:;
5702   // }
5703   //
5704   // if SimpleReduction is true, only the next code is generated:
5705   //  ...
5706   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5707   //  ...
5708 
5709   ASTContext &C = CGM.getContext();
5710 
5711   if (SimpleReduction) {
5712     CodeGenFunction::RunCleanupsScope Scope(CGF);
5713     auto IPriv = Privates.begin();
5714     auto ILHS = LHSExprs.begin();
5715     auto IRHS = RHSExprs.begin();
5716     for (const Expr *E : ReductionOps) {
5717       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5718                                   cast<DeclRefExpr>(*IRHS));
5719       ++IPriv;
5720       ++ILHS;
5721       ++IRHS;
5722     }
5723     return;
5724   }
5725 
5726   // 1. Build a list of reduction variables.
5727   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5728   auto Size = RHSExprs.size();
5729   for (const Expr *E : Privates) {
5730     if (E->getType()->isVariablyModifiedType())
5731       // Reserve place for array size.
5732       ++Size;
5733   }
5734   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5735   QualType ReductionArrayTy =
5736       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5737                              /*IndexTypeQuals=*/0);
5738   Address ReductionList =
5739       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5740   auto IPriv = Privates.begin();
5741   unsigned Idx = 0;
5742   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5743     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5744     CGF.Builder.CreateStore(
5745         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5746             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5747         Elem);
5748     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5749       // Store array size.
5750       ++Idx;
5751       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5752       llvm::Value *Size = CGF.Builder.CreateIntCast(
5753           CGF.getVLASize(
5754                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5755               .NumElts,
5756           CGF.SizeTy, /*isSigned=*/false);
5757       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5758                               Elem);
5759     }
5760   }
5761 
5762   // 2. Emit reduce_func().
5763   llvm::Function *ReductionFn = emitReductionFunction(
5764       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5765       LHSExprs, RHSExprs, ReductionOps);
5766 
5767   // 3. Create static kmp_critical_name lock = { 0 };
5768   std::string Name = getName({"reduction"});
5769   llvm::Value *Lock = getCriticalRegionLock(Name);
5770 
5771   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5772   // RedList, reduce_func, &<lock>);
5773   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5774   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5775   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5776   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5777       ReductionList.getPointer(), CGF.VoidPtrTy);
5778   llvm::Value *Args[] = {
5779       IdentTLoc,                             // ident_t *<loc>
5780       ThreadId,                              // i32 <gtid>
5781       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5782       ReductionArrayTySize,                  // size_type sizeof(RedList)
5783       RL,                                    // void *RedList
5784       ReductionFn, // void (*) (void *, void *) <reduce_func>
5785       Lock         // kmp_critical_name *&<lock>
5786   };
5787   llvm::Value *Res = CGF.EmitRuntimeCall(
5788       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5789                                        : OMPRTL__kmpc_reduce),
5790       Args);
5791 
5792   // 5. Build switch(res)
5793   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5794   llvm::SwitchInst *SwInst =
5795       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5796 
5797   // 6. Build case 1:
5798   //  ...
5799   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5800   //  ...
5801   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5802   // break;
5803   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5804   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5805   CGF.EmitBlock(Case1BB);
5806 
5807   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5808   llvm::Value *EndArgs[] = {
5809       IdentTLoc, // ident_t *<loc>
5810       ThreadId,  // i32 <gtid>
5811       Lock       // kmp_critical_name *&<lock>
5812   };
5813   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5814                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5815     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5816     auto IPriv = Privates.begin();
5817     auto ILHS = LHSExprs.begin();
5818     auto IRHS = RHSExprs.begin();
5819     for (const Expr *E : ReductionOps) {
5820       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5821                                      cast<DeclRefExpr>(*IRHS));
5822       ++IPriv;
5823       ++ILHS;
5824       ++IRHS;
5825     }
5826   };
5827   RegionCodeGenTy RCG(CodeGen);
5828   CommonActionTy Action(
5829       nullptr, llvm::None,
5830       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5831                                        : OMPRTL__kmpc_end_reduce),
5832       EndArgs);
5833   RCG.setAction(Action);
5834   RCG(CGF);
5835 
5836   CGF.EmitBranch(DefaultBB);
5837 
5838   // 7. Build case 2:
5839   //  ...
5840   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5841   //  ...
5842   // break;
5843   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5844   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5845   CGF.EmitBlock(Case2BB);
5846 
5847   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5848                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5849     auto ILHS = LHSExprs.begin();
5850     auto IRHS = RHSExprs.begin();
5851     auto IPriv = Privates.begin();
5852     for (const Expr *E : ReductionOps) {
5853       const Expr *XExpr = nullptr;
5854       const Expr *EExpr = nullptr;
5855       const Expr *UpExpr = nullptr;
5856       BinaryOperatorKind BO = BO_Comma;
5857       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5858         if (BO->getOpcode() == BO_Assign) {
5859           XExpr = BO->getLHS();
5860           UpExpr = BO->getRHS();
5861         }
5862       }
5863       // Try to emit update expression as a simple atomic.
5864       const Expr *RHSExpr = UpExpr;
5865       if (RHSExpr) {
5866         // Analyze RHS part of the whole expression.
5867         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5868                 RHSExpr->IgnoreParenImpCasts())) {
5869           // If this is a conditional operator, analyze its condition for
5870           // min/max reduction operator.
5871           RHSExpr = ACO->getCond();
5872         }
5873         if (const auto *BORHS =
5874                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5875           EExpr = BORHS->getRHS();
5876           BO = BORHS->getOpcode();
5877         }
5878       }
5879       if (XExpr) {
5880         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5881         auto &&AtomicRedGen = [BO, VD,
5882                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5883                                     const Expr *EExpr, const Expr *UpExpr) {
5884           LValue X = CGF.EmitLValue(XExpr);
5885           RValue E;
5886           if (EExpr)
5887             E = CGF.EmitAnyExpr(EExpr);
5888           CGF.EmitOMPAtomicSimpleUpdateExpr(
5889               X, E, BO, /*IsXLHSInRHSPart=*/true,
5890               llvm::AtomicOrdering::Monotonic, Loc,
5891               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5892                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5893                 PrivateScope.addPrivate(
5894                     VD, [&CGF, VD, XRValue, Loc]() {
5895                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5896                       CGF.emitOMPSimpleStore(
5897                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5898                           VD->getType().getNonReferenceType(), Loc);
5899                       return LHSTemp;
5900                     });
5901                 (void)PrivateScope.Privatize();
5902                 return CGF.EmitAnyExpr(UpExpr);
5903               });
5904         };
5905         if ((*IPriv)->getType()->isArrayType()) {
5906           // Emit atomic reduction for array section.
5907           const auto *RHSVar =
5908               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5909           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5910                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5911         } else {
5912           // Emit atomic reduction for array subscript or single variable.
5913           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5914         }
5915       } else {
5916         // Emit as a critical region.
5917         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5918                                            const Expr *, const Expr *) {
5919           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5920           std::string Name = RT.getName({"atomic_reduction"});
5921           RT.emitCriticalRegion(
5922               CGF, Name,
5923               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5924                 Action.Enter(CGF);
5925                 emitReductionCombiner(CGF, E);
5926               },
5927               Loc);
5928         };
5929         if ((*IPriv)->getType()->isArrayType()) {
5930           const auto *LHSVar =
5931               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5932           const auto *RHSVar =
5933               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5934           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5935                                     CritRedGen);
5936         } else {
5937           CritRedGen(CGF, nullptr, nullptr, nullptr);
5938         }
5939       }
5940       ++ILHS;
5941       ++IRHS;
5942       ++IPriv;
5943     }
5944   };
5945   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5946   if (!WithNowait) {
5947     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5948     llvm::Value *EndArgs[] = {
5949         IdentTLoc, // ident_t *<loc>
5950         ThreadId,  // i32 <gtid>
5951         Lock       // kmp_critical_name *&<lock>
5952     };
5953     CommonActionTy Action(nullptr, llvm::None,
5954                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5955                           EndArgs);
5956     AtomicRCG.setAction(Action);
5957     AtomicRCG(CGF);
5958   } else {
5959     AtomicRCG(CGF);
5960   }
5961 
5962   CGF.EmitBranch(DefaultBB);
5963   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5964 }
5965 
5966 /// Generates unique name for artificial threadprivate variables.
5967 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5968 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5969                                       const Expr *Ref) {
5970   SmallString<256> Buffer;
5971   llvm::raw_svector_ostream Out(Buffer);
5972   const clang::DeclRefExpr *DE;
5973   const VarDecl *D = ::getBaseDecl(Ref, DE);
5974   if (!D)
5975     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5976   D = D->getCanonicalDecl();
5977   std::string Name = CGM.getOpenMPRuntime().getName(
5978       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5979   Out << Prefix << Name << "_"
5980       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5981   return std::string(Out.str());
5982 }
5983 
5984 /// Emits reduction initializer function:
5985 /// \code
5986 /// void @.red_init(void* %arg) {
5987 /// %0 = bitcast void* %arg to <type>*
5988 /// store <type> <init>, <type>* %0
5989 /// ret void
5990 /// }
5991 /// \endcode
5992 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5993                                            SourceLocation Loc,
5994                                            ReductionCodeGen &RCG, unsigned N) {
5995   ASTContext &C = CGM.getContext();
5996   FunctionArgList Args;
5997   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5998                           ImplicitParamDecl::Other);
5999   Args.emplace_back(&Param);
6000   const auto &FnInfo =
6001       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6002   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6003   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6004   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6005                                     Name, &CGM.getModule());
6006   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6007   Fn->setDoesNotRecurse();
6008   CodeGenFunction CGF(CGM);
6009   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6010   Address PrivateAddr = CGF.EmitLoadOfPointer(
6011       CGF.GetAddrOfLocalVar(&Param),
6012       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6013   llvm::Value *Size = nullptr;
6014   // If the size of the reduction item is non-constant, load it from global
6015   // threadprivate variable.
6016   if (RCG.getSizes(N).second) {
6017     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6018         CGF, CGM.getContext().getSizeType(),
6019         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6020     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6021                                 CGM.getContext().getSizeType(), Loc);
6022   }
6023   RCG.emitAggregateType(CGF, N, Size);
6024   LValue SharedLVal;
6025   // If initializer uses initializer from declare reduction construct, emit a
6026   // pointer to the address of the original reduction item (reuired by reduction
6027   // initializer)
6028   if (RCG.usesReductionInitializer(N)) {
6029     Address SharedAddr =
6030         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6031             CGF, CGM.getContext().VoidPtrTy,
6032             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6033     SharedAddr = CGF.EmitLoadOfPointer(
6034         SharedAddr,
6035         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6036     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6037   } else {
6038     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6039         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6040         CGM.getContext().VoidPtrTy);
6041   }
6042   // Emit the initializer:
6043   // %0 = bitcast void* %arg to <type>*
6044   // store <type> <init>, <type>* %0
6045   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6046                          [](CodeGenFunction &) { return false; });
6047   CGF.FinishFunction();
6048   return Fn;
6049 }
6050 
6051 /// Emits reduction combiner function:
6052 /// \code
6053 /// void @.red_comb(void* %arg0, void* %arg1) {
6054 /// %lhs = bitcast void* %arg0 to <type>*
6055 /// %rhs = bitcast void* %arg1 to <type>*
6056 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6057 /// store <type> %2, <type>* %lhs
6058 /// ret void
6059 /// }
6060 /// \endcode
6061 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6062                                            SourceLocation Loc,
6063                                            ReductionCodeGen &RCG, unsigned N,
6064                                            const Expr *ReductionOp,
6065                                            const Expr *LHS, const Expr *RHS,
6066                                            const Expr *PrivateRef) {
6067   ASTContext &C = CGM.getContext();
6068   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6069   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6070   FunctionArgList Args;
6071   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6072                                C.VoidPtrTy, ImplicitParamDecl::Other);
6073   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6074                             ImplicitParamDecl::Other);
6075   Args.emplace_back(&ParamInOut);
6076   Args.emplace_back(&ParamIn);
6077   const auto &FnInfo =
6078       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6079   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6080   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6081   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6082                                     Name, &CGM.getModule());
6083   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6084   Fn->setDoesNotRecurse();
6085   CodeGenFunction CGF(CGM);
6086   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6087   llvm::Value *Size = nullptr;
6088   // If the size of the reduction item is non-constant, load it from global
6089   // threadprivate variable.
6090   if (RCG.getSizes(N).second) {
6091     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6092         CGF, CGM.getContext().getSizeType(),
6093         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6094     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6095                                 CGM.getContext().getSizeType(), Loc);
6096   }
6097   RCG.emitAggregateType(CGF, N, Size);
6098   // Remap lhs and rhs variables to the addresses of the function arguments.
6099   // %lhs = bitcast void* %arg0 to <type>*
6100   // %rhs = bitcast void* %arg1 to <type>*
6101   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6102   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6103     // Pull out the pointer to the variable.
6104     Address PtrAddr = CGF.EmitLoadOfPointer(
6105         CGF.GetAddrOfLocalVar(&ParamInOut),
6106         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6107     return CGF.Builder.CreateElementBitCast(
6108         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6109   });
6110   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6111     // Pull out the pointer to the variable.
6112     Address PtrAddr = CGF.EmitLoadOfPointer(
6113         CGF.GetAddrOfLocalVar(&ParamIn),
6114         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6115     return CGF.Builder.CreateElementBitCast(
6116         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6117   });
6118   PrivateScope.Privatize();
6119   // Emit the combiner body:
6120   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6121   // store <type> %2, <type>* %lhs
6122   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6123       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6124       cast<DeclRefExpr>(RHS));
6125   CGF.FinishFunction();
6126   return Fn;
6127 }
6128 
6129 /// Emits reduction finalizer function:
6130 /// \code
6131 /// void @.red_fini(void* %arg) {
6132 /// %0 = bitcast void* %arg to <type>*
6133 /// <destroy>(<type>* %0)
6134 /// ret void
6135 /// }
6136 /// \endcode
6137 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6138                                            SourceLocation Loc,
6139                                            ReductionCodeGen &RCG, unsigned N) {
6140   if (!RCG.needCleanups(N))
6141     return nullptr;
6142   ASTContext &C = CGM.getContext();
6143   FunctionArgList Args;
6144   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6145                           ImplicitParamDecl::Other);
6146   Args.emplace_back(&Param);
6147   const auto &FnInfo =
6148       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6149   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6150   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6151   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6152                                     Name, &CGM.getModule());
6153   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6154   Fn->setDoesNotRecurse();
6155   CodeGenFunction CGF(CGM);
6156   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6157   Address PrivateAddr = CGF.EmitLoadOfPointer(
6158       CGF.GetAddrOfLocalVar(&Param),
6159       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6160   llvm::Value *Size = nullptr;
6161   // If the size of the reduction item is non-constant, load it from global
6162   // threadprivate variable.
6163   if (RCG.getSizes(N).second) {
6164     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6165         CGF, CGM.getContext().getSizeType(),
6166         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6167     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6168                                 CGM.getContext().getSizeType(), Loc);
6169   }
6170   RCG.emitAggregateType(CGF, N, Size);
6171   // Emit the finalizer body:
6172   // <destroy>(<type>* %0)
6173   RCG.emitCleanups(CGF, N, PrivateAddr);
6174   CGF.FinishFunction(Loc);
6175   return Fn;
6176 }
6177 
6178 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6179     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6180     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6181   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6182     return nullptr;
6183 
6184   // Build typedef struct:
6185   // kmp_task_red_input {
6186   //   void *reduce_shar; // shared reduction item
6187   //   size_t reduce_size; // size of data item
6188   //   void *reduce_init; // data initialization routine
6189   //   void *reduce_fini; // data finalization routine
6190   //   void *reduce_comb; // data combiner routine
6191   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6192   // } kmp_task_red_input_t;
6193   ASTContext &C = CGM.getContext();
6194   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6195   RD->startDefinition();
6196   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6197   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6198   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6199   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6200   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6201   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6202       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6203   RD->completeDefinition();
6204   QualType RDType = C.getRecordType(RD);
6205   unsigned Size = Data.ReductionVars.size();
6206   llvm::APInt ArraySize(/*numBits=*/64, Size);
6207   QualType ArrayRDType = C.getConstantArrayType(
6208       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6209   // kmp_task_red_input_t .rd_input.[Size];
6210   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6211   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6212                        Data.ReductionOps);
6213   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6214     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6215     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6216                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6217     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6218         TaskRedInput.getPointer(), Idxs,
6219         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6220         ".rd_input.gep.");
6221     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6222     // ElemLVal.reduce_shar = &Shareds[Cnt];
6223     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6224     RCG.emitSharedLValue(CGF, Cnt);
6225     llvm::Value *CastedShared =
6226         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6227     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6228     RCG.emitAggregateType(CGF, Cnt);
6229     llvm::Value *SizeValInChars;
6230     llvm::Value *SizeVal;
6231     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6232     // We use delayed creation/initialization for VLAs, array sections and
6233     // custom reduction initializations. It is required because runtime does not
6234     // provide the way to pass the sizes of VLAs/array sections to
6235     // initializer/combiner/finalizer functions and does not pass the pointer to
6236     // original reduction item to the initializer. Instead threadprivate global
6237     // variables are used to store these values and use them in the functions.
6238     bool DelayedCreation = !!SizeVal;
6239     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6240                                                /*isSigned=*/false);
6241     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6242     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6243     // ElemLVal.reduce_init = init;
6244     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6245     llvm::Value *InitAddr =
6246         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6247     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6248     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6249     // ElemLVal.reduce_fini = fini;
6250     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6251     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6252     llvm::Value *FiniAddr = Fini
6253                                 ? CGF.EmitCastToVoidPtr(Fini)
6254                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6255     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6256     // ElemLVal.reduce_comb = comb;
6257     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6258     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6259         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6260         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6261     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6262     // ElemLVal.flags = 0;
6263     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6264     if (DelayedCreation) {
6265       CGF.EmitStoreOfScalar(
6266           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6267           FlagsLVal);
6268     } else
6269       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6270                                  FlagsLVal.getType());
6271   }
6272   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6273   // *data);
6274   llvm::Value *Args[] = {
6275       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6276                                 /*isSigned=*/true),
6277       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6278       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6279                                                       CGM.VoidPtrTy)};
6280   return CGF.EmitRuntimeCall(
6281       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6282 }
6283 
6284 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6285                                               SourceLocation Loc,
6286                                               ReductionCodeGen &RCG,
6287                                               unsigned N) {
6288   auto Sizes = RCG.getSizes(N);
6289   // Emit threadprivate global variable if the type is non-constant
6290   // (Sizes.second = nullptr).
6291   if (Sizes.second) {
6292     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6293                                                      /*isSigned=*/false);
6294     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6295         CGF, CGM.getContext().getSizeType(),
6296         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6297     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6298   }
6299   // Store address of the original reduction item if custom initializer is used.
6300   if (RCG.usesReductionInitializer(N)) {
6301     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6302         CGF, CGM.getContext().VoidPtrTy,
6303         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6304     CGF.Builder.CreateStore(
6305         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6306             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6307         SharedAddr, /*IsVolatile=*/false);
6308   }
6309 }
6310 
6311 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6312                                               SourceLocation Loc,
6313                                               llvm::Value *ReductionsPtr,
6314                                               LValue SharedLVal) {
6315   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6316   // *d);
6317   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6318                                                    CGM.IntTy,
6319                                                    /*isSigned=*/true),
6320                          ReductionsPtr,
6321                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6322                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6323   return Address(
6324       CGF.EmitRuntimeCall(
6325           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6326       SharedLVal.getAlignment());
6327 }
6328 
6329 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6330                                        SourceLocation Loc) {
6331   if (!CGF.HaveInsertPoint())
6332     return;
6333   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6334   // global_tid);
6335   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6336   // Ignore return result until untied tasks are supported.
6337   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6339     Region->emitUntiedSwitch(CGF);
6340 }
6341 
6342 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6343                                            OpenMPDirectiveKind InnerKind,
6344                                            const RegionCodeGenTy &CodeGen,
6345                                            bool HasCancel) {
6346   if (!CGF.HaveInsertPoint())
6347     return;
6348   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6349   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6350 }
6351 
6352 namespace {
6353 enum RTCancelKind {
6354   CancelNoreq = 0,
6355   CancelParallel = 1,
6356   CancelLoop = 2,
6357   CancelSections = 3,
6358   CancelTaskgroup = 4
6359 };
6360 } // anonymous namespace
6361 
6362 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6363   RTCancelKind CancelKind = CancelNoreq;
6364   if (CancelRegion == OMPD_parallel)
6365     CancelKind = CancelParallel;
6366   else if (CancelRegion == OMPD_for)
6367     CancelKind = CancelLoop;
6368   else if (CancelRegion == OMPD_sections)
6369     CancelKind = CancelSections;
6370   else {
6371     assert(CancelRegion == OMPD_taskgroup);
6372     CancelKind = CancelTaskgroup;
6373   }
6374   return CancelKind;
6375 }
6376 
6377 void CGOpenMPRuntime::emitCancellationPointCall(
6378     CodeGenFunction &CGF, SourceLocation Loc,
6379     OpenMPDirectiveKind CancelRegion) {
6380   if (!CGF.HaveInsertPoint())
6381     return;
6382   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6383   // global_tid, kmp_int32 cncl_kind);
6384   if (auto *OMPRegionInfo =
6385           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6386     // For 'cancellation point taskgroup', the task region info may not have a
6387     // cancel. This may instead happen in another adjacent task.
6388     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6389       llvm::Value *Args[] = {
6390           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6391           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6392       // Ignore return result until untied tasks are supported.
6393       llvm::Value *Result = CGF.EmitRuntimeCall(
6394           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6395       // if (__kmpc_cancellationpoint()) {
6396       //   exit from construct;
6397       // }
6398       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6399       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6400       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6401       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6402       CGF.EmitBlock(ExitBB);
6403       // exit from construct;
6404       CodeGenFunction::JumpDest CancelDest =
6405           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6406       CGF.EmitBranchThroughCleanup(CancelDest);
6407       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6408     }
6409   }
6410 }
6411 
6412 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6413                                      const Expr *IfCond,
6414                                      OpenMPDirectiveKind CancelRegion) {
6415   if (!CGF.HaveInsertPoint())
6416     return;
6417   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6418   // kmp_int32 cncl_kind);
6419   if (auto *OMPRegionInfo =
6420           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6421     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6422                                                         PrePostActionTy &) {
6423       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6424       llvm::Value *Args[] = {
6425           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6426           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6427       // Ignore return result until untied tasks are supported.
6428       llvm::Value *Result = CGF.EmitRuntimeCall(
6429           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6430       // if (__kmpc_cancel()) {
6431       //   exit from construct;
6432       // }
6433       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6434       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6435       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6436       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6437       CGF.EmitBlock(ExitBB);
6438       // exit from construct;
6439       CodeGenFunction::JumpDest CancelDest =
6440           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6441       CGF.EmitBranchThroughCleanup(CancelDest);
6442       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6443     };
6444     if (IfCond) {
6445       emitIfClause(CGF, IfCond, ThenGen,
6446                    [](CodeGenFunction &, PrePostActionTy &) {});
6447     } else {
6448       RegionCodeGenTy ThenRCG(ThenGen);
6449       ThenRCG(CGF);
6450     }
6451   }
6452 }
6453 
6454 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6455     const OMPExecutableDirective &D, StringRef ParentName,
6456     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6457     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6458   assert(!ParentName.empty() && "Invalid target region parent name!");
6459   HasEmittedTargetRegion = true;
6460   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6461                                    IsOffloadEntry, CodeGen);
6462 }
6463 
6464 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6465     const OMPExecutableDirective &D, StringRef ParentName,
6466     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6467     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6468   // Create a unique name for the entry function using the source location
6469   // information of the current target region. The name will be something like:
6470   //
6471   // __omp_offloading_DD_FFFF_PP_lBB
6472   //
6473   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6474   // mangled name of the function that encloses the target region and BB is the
6475   // line number of the target region.
6476 
6477   unsigned DeviceID;
6478   unsigned FileID;
6479   unsigned Line;
6480   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6481                            Line);
6482   SmallString<64> EntryFnName;
6483   {
6484     llvm::raw_svector_ostream OS(EntryFnName);
6485     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6486        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6487   }
6488 
6489   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6490 
6491   CodeGenFunction CGF(CGM, true);
6492   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6493   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6494 
6495   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6496 
6497   // If this target outline function is not an offload entry, we don't need to
6498   // register it.
6499   if (!IsOffloadEntry)
6500     return;
6501 
6502   // The target region ID is used by the runtime library to identify the current
6503   // target region, so it only has to be unique and not necessarily point to
6504   // anything. It could be the pointer to the outlined function that implements
6505   // the target region, but we aren't using that so that the compiler doesn't
6506   // need to keep that, and could therefore inline the host function if proven
6507   // worthwhile during optimization. In the other hand, if emitting code for the
6508   // device, the ID has to be the function address so that it can retrieved from
6509   // the offloading entry and launched by the runtime library. We also mark the
6510   // outlined function to have external linkage in case we are emitting code for
6511   // the device, because these functions will be entry points to the device.
6512 
6513   if (CGM.getLangOpts().OpenMPIsDevice) {
6514     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6515     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6516     OutlinedFn->setDSOLocal(false);
6517   } else {
6518     std::string Name = getName({EntryFnName, "region_id"});
6519     OutlinedFnID = new llvm::GlobalVariable(
6520         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6521         llvm::GlobalValue::WeakAnyLinkage,
6522         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6523   }
6524 
6525   // Register the information for the entry associated with this target region.
6526   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6527       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6528       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6529 }
6530 
6531 /// Checks if the expression is constant or does not have non-trivial function
6532 /// calls.
6533 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6534   // We can skip constant expressions.
6535   // We can skip expressions with trivial calls or simple expressions.
6536   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6537           !E->hasNonTrivialCall(Ctx)) &&
6538          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6539 }
6540 
6541 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6542                                                     const Stmt *Body) {
6543   const Stmt *Child = Body->IgnoreContainers();
6544   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6545     Child = nullptr;
6546     for (const Stmt *S : C->body()) {
6547       if (const auto *E = dyn_cast<Expr>(S)) {
6548         if (isTrivial(Ctx, E))
6549           continue;
6550       }
6551       // Some of the statements can be ignored.
6552       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6553           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6554         continue;
6555       // Analyze declarations.
6556       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6557         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6558               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6559                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6560                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6561                   isa<UsingDirectiveDecl>(D) ||
6562                   isa<OMPDeclareReductionDecl>(D) ||
6563                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6564                 return true;
6565               const auto *VD = dyn_cast<VarDecl>(D);
6566               if (!VD)
6567                 return false;
6568               return VD->isConstexpr() ||
6569                      ((VD->getType().isTrivialType(Ctx) ||
6570                        VD->getType()->isReferenceType()) &&
6571                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6572             }))
6573           continue;
6574       }
6575       // Found multiple children - cannot get the one child only.
6576       if (Child)
6577         return nullptr;
6578       Child = S;
6579     }
6580     if (Child)
6581       Child = Child->IgnoreContainers();
6582   }
6583   return Child;
6584 }
6585 
6586 /// Emit the number of teams for a target directive.  Inspect the num_teams
6587 /// clause associated with a teams construct combined or closely nested
6588 /// with the target directive.
6589 ///
6590 /// Emit a team of size one for directives such as 'target parallel' that
6591 /// have no associated teams construct.
6592 ///
6593 /// Otherwise, return nullptr.
6594 static llvm::Value *
6595 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6596                                const OMPExecutableDirective &D) {
6597   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6598          "Clauses associated with the teams directive expected to be emitted "
6599          "only for the host!");
6600   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6601   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6602          "Expected target-based executable directive.");
6603   CGBuilderTy &Bld = CGF.Builder;
6604   switch (DirectiveKind) {
6605   case OMPD_target: {
6606     const auto *CS = D.getInnermostCapturedStmt();
6607     const auto *Body =
6608         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6609     const Stmt *ChildStmt =
6610         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6611     if (const auto *NestedDir =
6612             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6613       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6614         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6615           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6616           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6617           const Expr *NumTeams =
6618               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6619           llvm::Value *NumTeamsVal =
6620               CGF.EmitScalarExpr(NumTeams,
6621                                  /*IgnoreResultAssign*/ true);
6622           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6623                                    /*isSigned=*/true);
6624         }
6625         return Bld.getInt32(0);
6626       }
6627       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6628           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6629         return Bld.getInt32(1);
6630       return Bld.getInt32(0);
6631     }
6632     return nullptr;
6633   }
6634   case OMPD_target_teams:
6635   case OMPD_target_teams_distribute:
6636   case OMPD_target_teams_distribute_simd:
6637   case OMPD_target_teams_distribute_parallel_for:
6638   case OMPD_target_teams_distribute_parallel_for_simd: {
6639     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6640       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6641       const Expr *NumTeams =
6642           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643       llvm::Value *NumTeamsVal =
6644           CGF.EmitScalarExpr(NumTeams,
6645                              /*IgnoreResultAssign*/ true);
6646       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6647                                /*isSigned=*/true);
6648     }
6649     return Bld.getInt32(0);
6650   }
6651   case OMPD_target_parallel:
6652   case OMPD_target_parallel_for:
6653   case OMPD_target_parallel_for_simd:
6654   case OMPD_target_simd:
6655     return Bld.getInt32(1);
6656   case OMPD_parallel:
6657   case OMPD_for:
6658   case OMPD_parallel_for:
6659   case OMPD_parallel_master:
6660   case OMPD_parallel_sections:
6661   case OMPD_for_simd:
6662   case OMPD_parallel_for_simd:
6663   case OMPD_cancel:
6664   case OMPD_cancellation_point:
6665   case OMPD_ordered:
6666   case OMPD_threadprivate:
6667   case OMPD_allocate:
6668   case OMPD_task:
6669   case OMPD_simd:
6670   case OMPD_sections:
6671   case OMPD_section:
6672   case OMPD_single:
6673   case OMPD_master:
6674   case OMPD_critical:
6675   case OMPD_taskyield:
6676   case OMPD_barrier:
6677   case OMPD_taskwait:
6678   case OMPD_taskgroup:
6679   case OMPD_atomic:
6680   case OMPD_flush:
6681   case OMPD_teams:
6682   case OMPD_target_data:
6683   case OMPD_target_exit_data:
6684   case OMPD_target_enter_data:
6685   case OMPD_distribute:
6686   case OMPD_distribute_simd:
6687   case OMPD_distribute_parallel_for:
6688   case OMPD_distribute_parallel_for_simd:
6689   case OMPD_teams_distribute:
6690   case OMPD_teams_distribute_simd:
6691   case OMPD_teams_distribute_parallel_for:
6692   case OMPD_teams_distribute_parallel_for_simd:
6693   case OMPD_target_update:
6694   case OMPD_declare_simd:
6695   case OMPD_declare_variant:
6696   case OMPD_declare_target:
6697   case OMPD_end_declare_target:
6698   case OMPD_declare_reduction:
6699   case OMPD_declare_mapper:
6700   case OMPD_taskloop:
6701   case OMPD_taskloop_simd:
6702   case OMPD_master_taskloop:
6703   case OMPD_master_taskloop_simd:
6704   case OMPD_parallel_master_taskloop:
6705   case OMPD_parallel_master_taskloop_simd:
6706   case OMPD_requires:
6707   case OMPD_unknown:
6708     break;
6709   }
6710   llvm_unreachable("Unexpected directive kind.");
6711 }
6712 
6713 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6714                                   llvm::Value *DefaultThreadLimitVal) {
6715   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6716       CGF.getContext(), CS->getCapturedStmt());
6717   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6718     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6719       llvm::Value *NumThreads = nullptr;
6720       llvm::Value *CondVal = nullptr;
6721       // Handle if clause. If if clause present, the number of threads is
6722       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6723       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6724         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6725         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6726         const OMPIfClause *IfClause = nullptr;
6727         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6728           if (C->getNameModifier() == OMPD_unknown ||
6729               C->getNameModifier() == OMPD_parallel) {
6730             IfClause = C;
6731             break;
6732           }
6733         }
6734         if (IfClause) {
6735           const Expr *Cond = IfClause->getCondition();
6736           bool Result;
6737           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6738             if (!Result)
6739               return CGF.Builder.getInt32(1);
6740           } else {
6741             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6742             if (const auto *PreInit =
6743                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6744               for (const auto *I : PreInit->decls()) {
6745                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6746                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6747                 } else {
6748                   CodeGenFunction::AutoVarEmission Emission =
6749                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6750                   CGF.EmitAutoVarCleanups(Emission);
6751                 }
6752               }
6753             }
6754             CondVal = CGF.EvaluateExprAsBool(Cond);
6755           }
6756         }
6757       }
6758       // Check the value of num_threads clause iff if clause was not specified
6759       // or is not evaluated to false.
6760       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6761         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6762         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6763         const auto *NumThreadsClause =
6764             Dir->getSingleClause<OMPNumThreadsClause>();
6765         CodeGenFunction::LexicalScope Scope(
6766             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6767         if (const auto *PreInit =
6768                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6769           for (const auto *I : PreInit->decls()) {
6770             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6771               CGF.EmitVarDecl(cast<VarDecl>(*I));
6772             } else {
6773               CodeGenFunction::AutoVarEmission Emission =
6774                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6775               CGF.EmitAutoVarCleanups(Emission);
6776             }
6777           }
6778         }
6779         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6780         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6781                                                /*isSigned=*/false);
6782         if (DefaultThreadLimitVal)
6783           NumThreads = CGF.Builder.CreateSelect(
6784               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6785               DefaultThreadLimitVal, NumThreads);
6786       } else {
6787         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6788                                            : CGF.Builder.getInt32(0);
6789       }
6790       // Process condition of the if clause.
6791       if (CondVal) {
6792         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6793                                               CGF.Builder.getInt32(1));
6794       }
6795       return NumThreads;
6796     }
6797     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6798       return CGF.Builder.getInt32(1);
6799     return DefaultThreadLimitVal;
6800   }
6801   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6802                                : CGF.Builder.getInt32(0);
6803 }
6804 
6805 /// Emit the number of threads for a target directive.  Inspect the
6806 /// thread_limit clause associated with a teams construct combined or closely
6807 /// nested with the target directive.
6808 ///
6809 /// Emit the num_threads clause for directives such as 'target parallel' that
6810 /// have no associated teams construct.
6811 ///
6812 /// Otherwise, return nullptr.
6813 static llvm::Value *
6814 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6815                                  const OMPExecutableDirective &D) {
6816   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6817          "Clauses associated with the teams directive expected to be emitted "
6818          "only for the host!");
6819   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6820   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6821          "Expected target-based executable directive.");
6822   CGBuilderTy &Bld = CGF.Builder;
6823   llvm::Value *ThreadLimitVal = nullptr;
6824   llvm::Value *NumThreadsVal = nullptr;
6825   switch (DirectiveKind) {
6826   case OMPD_target: {
6827     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6828     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6829       return NumThreads;
6830     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6831         CGF.getContext(), CS->getCapturedStmt());
6832     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6833       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6834         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6835         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6836         const auto *ThreadLimitClause =
6837             Dir->getSingleClause<OMPThreadLimitClause>();
6838         CodeGenFunction::LexicalScope Scope(
6839             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6840         if (const auto *PreInit =
6841                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6842           for (const auto *I : PreInit->decls()) {
6843             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6844               CGF.EmitVarDecl(cast<VarDecl>(*I));
6845             } else {
6846               CodeGenFunction::AutoVarEmission Emission =
6847                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6848               CGF.EmitAutoVarCleanups(Emission);
6849             }
6850           }
6851         }
6852         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6853             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6854         ThreadLimitVal =
6855             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6856       }
6857       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6858           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6859         CS = Dir->getInnermostCapturedStmt();
6860         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6861             CGF.getContext(), CS->getCapturedStmt());
6862         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6863       }
6864       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6865           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6866         CS = Dir->getInnermostCapturedStmt();
6867         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6868           return NumThreads;
6869       }
6870       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6871         return Bld.getInt32(1);
6872     }
6873     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6874   }
6875   case OMPD_target_teams: {
6876     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6877       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6878       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6879       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6880           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6881       ThreadLimitVal =
6882           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6883     }
6884     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6885     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6886       return NumThreads;
6887     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6888         CGF.getContext(), CS->getCapturedStmt());
6889     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6890       if (Dir->getDirectiveKind() == OMPD_distribute) {
6891         CS = Dir->getInnermostCapturedStmt();
6892         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6893           return NumThreads;
6894       }
6895     }
6896     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6897   }
6898   case OMPD_target_teams_distribute:
6899     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904       ThreadLimitVal =
6905           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6906     }
6907     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6908   case OMPD_target_parallel:
6909   case OMPD_target_parallel_for:
6910   case OMPD_target_parallel_for_simd:
6911   case OMPD_target_teams_distribute_parallel_for:
6912   case OMPD_target_teams_distribute_parallel_for_simd: {
6913     llvm::Value *CondVal = nullptr;
6914     // Handle if clause. If if clause present, the number of threads is
6915     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6916     if (D.hasClausesOfKind<OMPIfClause>()) {
6917       const OMPIfClause *IfClause = nullptr;
6918       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6919         if (C->getNameModifier() == OMPD_unknown ||
6920             C->getNameModifier() == OMPD_parallel) {
6921           IfClause = C;
6922           break;
6923         }
6924       }
6925       if (IfClause) {
6926         const Expr *Cond = IfClause->getCondition();
6927         bool Result;
6928         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6929           if (!Result)
6930             return Bld.getInt32(1);
6931         } else {
6932           CodeGenFunction::RunCleanupsScope Scope(CGF);
6933           CondVal = CGF.EvaluateExprAsBool(Cond);
6934         }
6935       }
6936     }
6937     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6938       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6939       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6940       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6941           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6942       ThreadLimitVal =
6943           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6944     }
6945     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6946       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6947       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6948       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6949           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6950       NumThreadsVal =
6951           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6952       ThreadLimitVal = ThreadLimitVal
6953                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6954                                                                 ThreadLimitVal),
6955                                               NumThreadsVal, ThreadLimitVal)
6956                            : NumThreadsVal;
6957     }
6958     if (!ThreadLimitVal)
6959       ThreadLimitVal = Bld.getInt32(0);
6960     if (CondVal)
6961       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6962     return ThreadLimitVal;
6963   }
6964   case OMPD_target_teams_distribute_simd:
6965   case OMPD_target_simd:
6966     return Bld.getInt32(1);
6967   case OMPD_parallel:
6968   case OMPD_for:
6969   case OMPD_parallel_for:
6970   case OMPD_parallel_master:
6971   case OMPD_parallel_sections:
6972   case OMPD_for_simd:
6973   case OMPD_parallel_for_simd:
6974   case OMPD_cancel:
6975   case OMPD_cancellation_point:
6976   case OMPD_ordered:
6977   case OMPD_threadprivate:
6978   case OMPD_allocate:
6979   case OMPD_task:
6980   case OMPD_simd:
6981   case OMPD_sections:
6982   case OMPD_section:
6983   case OMPD_single:
6984   case OMPD_master:
6985   case OMPD_critical:
6986   case OMPD_taskyield:
6987   case OMPD_barrier:
6988   case OMPD_taskwait:
6989   case OMPD_taskgroup:
6990   case OMPD_atomic:
6991   case OMPD_flush:
6992   case OMPD_teams:
6993   case OMPD_target_data:
6994   case OMPD_target_exit_data:
6995   case OMPD_target_enter_data:
6996   case OMPD_distribute:
6997   case OMPD_distribute_simd:
6998   case OMPD_distribute_parallel_for:
6999   case OMPD_distribute_parallel_for_simd:
7000   case OMPD_teams_distribute:
7001   case OMPD_teams_distribute_simd:
7002   case OMPD_teams_distribute_parallel_for:
7003   case OMPD_teams_distribute_parallel_for_simd:
7004   case OMPD_target_update:
7005   case OMPD_declare_simd:
7006   case OMPD_declare_variant:
7007   case OMPD_declare_target:
7008   case OMPD_end_declare_target:
7009   case OMPD_declare_reduction:
7010   case OMPD_declare_mapper:
7011   case OMPD_taskloop:
7012   case OMPD_taskloop_simd:
7013   case OMPD_master_taskloop:
7014   case OMPD_master_taskloop_simd:
7015   case OMPD_parallel_master_taskloop:
7016   case OMPD_parallel_master_taskloop_simd:
7017   case OMPD_requires:
7018   case OMPD_unknown:
7019     break;
7020   }
7021   llvm_unreachable("Unsupported directive kind.");
7022 }
7023 
7024 namespace {
7025 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7026 
7027 // Utility to handle information from clauses associated with a given
7028 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7029 // It provides a convenient interface to obtain the information and generate
7030 // code for that information.
7031 class MappableExprsHandler {
7032 public:
7033   /// Values for bit flags used to specify the mapping type for
7034   /// offloading.
7035   enum OpenMPOffloadMappingFlags : uint64_t {
7036     /// No flags
7037     OMP_MAP_NONE = 0x0,
7038     /// Allocate memory on the device and move data from host to device.
7039     OMP_MAP_TO = 0x01,
7040     /// Allocate memory on the device and move data from device to host.
7041     OMP_MAP_FROM = 0x02,
7042     /// Always perform the requested mapping action on the element, even
7043     /// if it was already mapped before.
7044     OMP_MAP_ALWAYS = 0x04,
7045     /// Delete the element from the device environment, ignoring the
7046     /// current reference count associated with the element.
7047     OMP_MAP_DELETE = 0x08,
7048     /// The element being mapped is a pointer-pointee pair; both the
7049     /// pointer and the pointee should be mapped.
7050     OMP_MAP_PTR_AND_OBJ = 0x10,
7051     /// This flags signals that the base address of an entry should be
7052     /// passed to the target kernel as an argument.
7053     OMP_MAP_TARGET_PARAM = 0x20,
7054     /// Signal that the runtime library has to return the device pointer
7055     /// in the current position for the data being mapped. Used when we have the
7056     /// use_device_ptr clause.
7057     OMP_MAP_RETURN_PARAM = 0x40,
7058     /// This flag signals that the reference being passed is a pointer to
7059     /// private data.
7060     OMP_MAP_PRIVATE = 0x80,
7061     /// Pass the element to the device by value.
7062     OMP_MAP_LITERAL = 0x100,
7063     /// Implicit map
7064     OMP_MAP_IMPLICIT = 0x200,
7065     /// Close is a hint to the runtime to allocate memory close to
7066     /// the target device.
7067     OMP_MAP_CLOSE = 0x400,
7068     /// The 16 MSBs of the flags indicate whether the entry is member of some
7069     /// struct/class.
7070     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7071     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7072   };
7073 
7074   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7075   static unsigned getFlagMemberOffset() {
7076     unsigned Offset = 0;
7077     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7078          Remain = Remain >> 1)
7079       Offset++;
7080     return Offset;
7081   }
7082 
7083   /// Class that associates information with a base pointer to be passed to the
7084   /// runtime library.
7085   class BasePointerInfo {
7086     /// The base pointer.
7087     llvm::Value *Ptr = nullptr;
7088     /// The base declaration that refers to this device pointer, or null if
7089     /// there is none.
7090     const ValueDecl *DevPtrDecl = nullptr;
7091 
7092   public:
7093     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7094         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7095     llvm::Value *operator*() const { return Ptr; }
7096     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7097     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7098   };
7099 
7100   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7101   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7102   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7103 
7104   /// Map between a struct and the its lowest & highest elements which have been
7105   /// mapped.
7106   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7107   ///                    HE(FieldIndex, Pointer)}
7108   struct StructRangeInfoTy {
7109     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7110         0, Address::invalid()};
7111     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7112         0, Address::invalid()};
7113     Address Base = Address::invalid();
7114   };
7115 
7116 private:
7117   /// Kind that defines how a device pointer has to be returned.
7118   struct MapInfo {
7119     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7120     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7121     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7122     bool ReturnDevicePointer = false;
7123     bool IsImplicit = false;
7124 
7125     MapInfo() = default;
7126     MapInfo(
7127         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7128         OpenMPMapClauseKind MapType,
7129         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7130         bool ReturnDevicePointer, bool IsImplicit)
7131         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7132           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7133   };
7134 
7135   /// If use_device_ptr is used on a pointer which is a struct member and there
7136   /// is no map information about it, then emission of that entry is deferred
7137   /// until the whole struct has been processed.
7138   struct DeferredDevicePtrEntryTy {
7139     const Expr *IE = nullptr;
7140     const ValueDecl *VD = nullptr;
7141 
7142     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7143         : IE(IE), VD(VD) {}
7144   };
7145 
7146   /// The target directive from where the mappable clauses were extracted. It
7147   /// is either a executable directive or a user-defined mapper directive.
7148   llvm::PointerUnion<const OMPExecutableDirective *,
7149                      const OMPDeclareMapperDecl *>
7150       CurDir;
7151 
7152   /// Function the directive is being generated for.
7153   CodeGenFunction &CGF;
7154 
7155   /// Set of all first private variables in the current directive.
7156   /// bool data is set to true if the variable is implicitly marked as
7157   /// firstprivate, false otherwise.
7158   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7159 
7160   /// Map between device pointer declarations and their expression components.
7161   /// The key value for declarations in 'this' is null.
7162   llvm::DenseMap<
7163       const ValueDecl *,
7164       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7165       DevPointersMap;
7166 
7167   llvm::Value *getExprTypeSize(const Expr *E) const {
7168     QualType ExprTy = E->getType().getCanonicalType();
7169 
7170     // Reference types are ignored for mapping purposes.
7171     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7172       ExprTy = RefTy->getPointeeType().getCanonicalType();
7173 
7174     // Given that an array section is considered a built-in type, we need to
7175     // do the calculation based on the length of the section instead of relying
7176     // on CGF.getTypeSize(E->getType()).
7177     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7178       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7179                             OAE->getBase()->IgnoreParenImpCasts())
7180                             .getCanonicalType();
7181 
7182       // If there is no length associated with the expression and lower bound is
7183       // not specified too, that means we are using the whole length of the
7184       // base.
7185       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7186           !OAE->getLowerBound())
7187         return CGF.getTypeSize(BaseTy);
7188 
7189       llvm::Value *ElemSize;
7190       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192       } else {
7193         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194         assert(ATy && "Expecting array type if not a pointer type.");
7195         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196       }
7197 
7198       // If we don't have a length at this point, that is because we have an
7199       // array section with a single element.
7200       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7201         return ElemSize;
7202 
7203       if (const Expr *LenExpr = OAE->getLength()) {
7204         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7205         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7206                                              CGF.getContext().getSizeType(),
7207                                              LenExpr->getExprLoc());
7208         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7209       }
7210       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7211              OAE->getLowerBound() && "expected array_section[lb:].");
7212       // Size = sizetype - lb * elemtype;
7213       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7214       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7215       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7216                                        CGF.getContext().getSizeType(),
7217                                        OAE->getLowerBound()->getExprLoc());
7218       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7219       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7220       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7221       LengthVal = CGF.Builder.CreateSelect(
7222           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7223       return LengthVal;
7224     }
7225     return CGF.getTypeSize(ExprTy);
7226   }
7227 
7228   /// Return the corresponding bits for a given map clause modifier. Add
7229   /// a flag marking the map as a pointer if requested. Add a flag marking the
7230   /// map as the first one of a series of maps that relate to the same map
7231   /// expression.
7232   OpenMPOffloadMappingFlags getMapTypeBits(
7233       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7234       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7235     OpenMPOffloadMappingFlags Bits =
7236         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7237     switch (MapType) {
7238     case OMPC_MAP_alloc:
7239     case OMPC_MAP_release:
7240       // alloc and release is the default behavior in the runtime library,  i.e.
7241       // if we don't pass any bits alloc/release that is what the runtime is
7242       // going to do. Therefore, we don't need to signal anything for these two
7243       // type modifiers.
7244       break;
7245     case OMPC_MAP_to:
7246       Bits |= OMP_MAP_TO;
7247       break;
7248     case OMPC_MAP_from:
7249       Bits |= OMP_MAP_FROM;
7250       break;
7251     case OMPC_MAP_tofrom:
7252       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7253       break;
7254     case OMPC_MAP_delete:
7255       Bits |= OMP_MAP_DELETE;
7256       break;
7257     case OMPC_MAP_unknown:
7258       llvm_unreachable("Unexpected map type!");
7259     }
7260     if (AddPtrFlag)
7261       Bits |= OMP_MAP_PTR_AND_OBJ;
7262     if (AddIsTargetParamFlag)
7263       Bits |= OMP_MAP_TARGET_PARAM;
7264     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7265         != MapModifiers.end())
7266       Bits |= OMP_MAP_ALWAYS;
7267     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7268         != MapModifiers.end())
7269       Bits |= OMP_MAP_CLOSE;
7270     return Bits;
7271   }
7272 
7273   /// Return true if the provided expression is a final array section. A
7274   /// final array section, is one whose length can't be proved to be one.
7275   bool isFinalArraySectionExpression(const Expr *E) const {
7276     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7277 
7278     // It is not an array section and therefore not a unity-size one.
7279     if (!OASE)
7280       return false;
7281 
7282     // An array section with no colon always refer to a single element.
7283     if (OASE->getColonLoc().isInvalid())
7284       return false;
7285 
7286     const Expr *Length = OASE->getLength();
7287 
7288     // If we don't have a length we have to check if the array has size 1
7289     // for this dimension. Also, we should always expect a length if the
7290     // base type is pointer.
7291     if (!Length) {
7292       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7293                              OASE->getBase()->IgnoreParenImpCasts())
7294                              .getCanonicalType();
7295       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7296         return ATy->getSize().getSExtValue() != 1;
7297       // If we don't have a constant dimension length, we have to consider
7298       // the current section as having any size, so it is not necessarily
7299       // unitary. If it happen to be unity size, that's user fault.
7300       return true;
7301     }
7302 
7303     // Check if the length evaluates to 1.
7304     Expr::EvalResult Result;
7305     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7306       return true; // Can have more that size 1.
7307 
7308     llvm::APSInt ConstLength = Result.Val.getInt();
7309     return ConstLength.getSExtValue() != 1;
7310   }
7311 
7312   /// Generate the base pointers, section pointers, sizes and map type
7313   /// bits for the provided map type, map modifier, and expression components.
7314   /// \a IsFirstComponent should be set to true if the provided set of
7315   /// components is the first associated with a capture.
7316   void generateInfoForComponentList(
7317       OpenMPMapClauseKind MapType,
7318       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7319       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7320       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7321       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7322       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7323       bool IsImplicit,
7324       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7325           OverlappedElements = llvm::None) const {
7326     // The following summarizes what has to be generated for each map and the
7327     // types below. The generated information is expressed in this order:
7328     // base pointer, section pointer, size, flags
7329     // (to add to the ones that come from the map type and modifier).
7330     //
7331     // double d;
7332     // int i[100];
7333     // float *p;
7334     //
7335     // struct S1 {
7336     //   int i;
7337     //   float f[50];
7338     // }
7339     // struct S2 {
7340     //   int i;
7341     //   float f[50];
7342     //   S1 s;
7343     //   double *p;
7344     //   struct S2 *ps;
7345     // }
7346     // S2 s;
7347     // S2 *ps;
7348     //
7349     // map(d)
7350     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7351     //
7352     // map(i)
7353     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7354     //
7355     // map(i[1:23])
7356     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7357     //
7358     // map(p)
7359     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7360     //
7361     // map(p[1:24])
7362     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7363     //
7364     // map(s)
7365     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7366     //
7367     // map(s.i)
7368     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7369     //
7370     // map(s.s.f)
7371     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7372     //
7373     // map(s.p)
7374     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7375     //
7376     // map(to: s.p[:22])
7377     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7378     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7379     // &(s.p), &(s.p[0]), 22*sizeof(double),
7380     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7381     // (*) alloc space for struct members, only this is a target parameter
7382     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7383     //      optimizes this entry out, same in the examples below)
7384     // (***) map the pointee (map: to)
7385     //
7386     // map(s.ps)
7387     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7388     //
7389     // map(from: s.ps->s.i)
7390     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7391     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7392     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7393     //
7394     // map(to: s.ps->ps)
7395     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7396     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7397     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7398     //
7399     // map(s.ps->ps->ps)
7400     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7401     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7402     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7403     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7404     //
7405     // map(to: s.ps->ps->s.f[:22])
7406     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7407     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7408     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7409     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7410     //
7411     // map(ps)
7412     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7413     //
7414     // map(ps->i)
7415     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7416     //
7417     // map(ps->s.f)
7418     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7419     //
7420     // map(from: ps->p)
7421     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7422     //
7423     // map(to: ps->p[:22])
7424     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7425     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7426     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7427     //
7428     // map(ps->ps)
7429     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7430     //
7431     // map(from: ps->ps->s.i)
7432     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7433     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7434     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7435     //
7436     // map(from: ps->ps->ps)
7437     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7438     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7439     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7440     //
7441     // map(ps->ps->ps->ps)
7442     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7443     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7444     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7445     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7446     //
7447     // map(to: ps->ps->ps->s.f[:22])
7448     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7449     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7450     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7451     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7452     //
7453     // map(to: s.f[:22]) map(from: s.p[:33])
7454     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7455     //     sizeof(double*) (**), TARGET_PARAM
7456     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7457     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7458     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7459     // (*) allocate contiguous space needed to fit all mapped members even if
7460     //     we allocate space for members not mapped (in this example,
7461     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7462     //     them as well because they fall between &s.f[0] and &s.p)
7463     //
7464     // map(from: s.f[:22]) map(to: ps->p[:33])
7465     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7466     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7467     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7468     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7469     // (*) the struct this entry pertains to is the 2nd element in the list of
7470     //     arguments, hence MEMBER_OF(2)
7471     //
7472     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7473     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7474     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7475     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7476     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7477     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7478     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7479     // (*) the struct this entry pertains to is the 4th element in the list
7480     //     of arguments, hence MEMBER_OF(4)
7481 
7482     // Track if the map information being generated is the first for a capture.
7483     bool IsCaptureFirstInfo = IsFirstComponentList;
7484     // When the variable is on a declare target link or in a to clause with
7485     // unified memory, a reference is needed to hold the host/device address
7486     // of the variable.
7487     bool RequiresReference = false;
7488 
7489     // Scan the components from the base to the complete expression.
7490     auto CI = Components.rbegin();
7491     auto CE = Components.rend();
7492     auto I = CI;
7493 
7494     // Track if the map information being generated is the first for a list of
7495     // components.
7496     bool IsExpressionFirstInfo = true;
7497     Address BP = Address::invalid();
7498     const Expr *AssocExpr = I->getAssociatedExpression();
7499     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7500     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7501 
7502     if (isa<MemberExpr>(AssocExpr)) {
7503       // The base is the 'this' pointer. The content of the pointer is going
7504       // to be the base of the field being mapped.
7505       BP = CGF.LoadCXXThisAddress();
7506     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7507                (OASE &&
7508                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7509       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7510     } else {
7511       // The base is the reference to the variable.
7512       // BP = &Var.
7513       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7514       if (const auto *VD =
7515               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7516         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7517                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7518           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7519               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7520                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7521             RequiresReference = true;
7522             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7523           }
7524         }
7525       }
7526 
7527       // If the variable is a pointer and is being dereferenced (i.e. is not
7528       // the last component), the base has to be the pointer itself, not its
7529       // reference. References are ignored for mapping purposes.
7530       QualType Ty =
7531           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7532       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7533         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7534 
7535         // We do not need to generate individual map information for the
7536         // pointer, it can be associated with the combined storage.
7537         ++I;
7538       }
7539     }
7540 
7541     // Track whether a component of the list should be marked as MEMBER_OF some
7542     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7543     // in a component list should be marked as MEMBER_OF, all subsequent entries
7544     // do not belong to the base struct. E.g.
7545     // struct S2 s;
7546     // s.ps->ps->ps->f[:]
7547     //   (1) (2) (3) (4)
7548     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7549     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7550     // is the pointee of ps(2) which is not member of struct s, so it should not
7551     // be marked as such (it is still PTR_AND_OBJ).
7552     // The variable is initialized to false so that PTR_AND_OBJ entries which
7553     // are not struct members are not considered (e.g. array of pointers to
7554     // data).
7555     bool ShouldBeMemberOf = false;
7556 
7557     // Variable keeping track of whether or not we have encountered a component
7558     // in the component list which is a member expression. Useful when we have a
7559     // pointer or a final array section, in which case it is the previous
7560     // component in the list which tells us whether we have a member expression.
7561     // E.g. X.f[:]
7562     // While processing the final array section "[:]" it is "f" which tells us
7563     // whether we are dealing with a member of a declared struct.
7564     const MemberExpr *EncounteredME = nullptr;
7565 
7566     for (; I != CE; ++I) {
7567       // If the current component is member of a struct (parent struct) mark it.
7568       if (!EncounteredME) {
7569         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7570         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7571         // as MEMBER_OF the parent struct.
7572         if (EncounteredME)
7573           ShouldBeMemberOf = true;
7574       }
7575 
7576       auto Next = std::next(I);
7577 
7578       // We need to generate the addresses and sizes if this is the last
7579       // component, if the component is a pointer or if it is an array section
7580       // whose length can't be proved to be one. If this is a pointer, it
7581       // becomes the base address for the following components.
7582 
7583       // A final array section, is one whose length can't be proved to be one.
7584       bool IsFinalArraySection =
7585           isFinalArraySectionExpression(I->getAssociatedExpression());
7586 
7587       // Get information on whether the element is a pointer. Have to do a
7588       // special treatment for array sections given that they are built-in
7589       // types.
7590       const auto *OASE =
7591           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7592       bool IsPointer =
7593           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7594                        .getCanonicalType()
7595                        ->isAnyPointerType()) ||
7596           I->getAssociatedExpression()->getType()->isAnyPointerType();
7597 
7598       if (Next == CE || IsPointer || IsFinalArraySection) {
7599         // If this is not the last component, we expect the pointer to be
7600         // associated with an array expression or member expression.
7601         assert((Next == CE ||
7602                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7603                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7604                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7605                "Unexpected expression");
7606 
7607         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7608                          .getAddress(CGF);
7609 
7610         // If this component is a pointer inside the base struct then we don't
7611         // need to create any entry for it - it will be combined with the object
7612         // it is pointing to into a single PTR_AND_OBJ entry.
7613         bool IsMemberPointer =
7614             IsPointer && EncounteredME &&
7615             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7616              EncounteredME);
7617         if (!OverlappedElements.empty()) {
7618           // Handle base element with the info for overlapped elements.
7619           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7620           assert(Next == CE &&
7621                  "Expected last element for the overlapped elements.");
7622           assert(!IsPointer &&
7623                  "Unexpected base element with the pointer type.");
7624           // Mark the whole struct as the struct that requires allocation on the
7625           // device.
7626           PartialStruct.LowestElem = {0, LB};
7627           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7628               I->getAssociatedExpression()->getType());
7629           Address HB = CGF.Builder.CreateConstGEP(
7630               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7631                                                               CGF.VoidPtrTy),
7632               TypeSize.getQuantity() - 1);
7633           PartialStruct.HighestElem = {
7634               std::numeric_limits<decltype(
7635                   PartialStruct.HighestElem.first)>::max(),
7636               HB};
7637           PartialStruct.Base = BP;
7638           // Emit data for non-overlapped data.
7639           OpenMPOffloadMappingFlags Flags =
7640               OMP_MAP_MEMBER_OF |
7641               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7642                              /*AddPtrFlag=*/false,
7643                              /*AddIsTargetParamFlag=*/false);
7644           LB = BP;
7645           llvm::Value *Size = nullptr;
7646           // Do bitcopy of all non-overlapped structure elements.
7647           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7648                    Component : OverlappedElements) {
7649             Address ComponentLB = Address::invalid();
7650             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7651                  Component) {
7652               if (MC.getAssociatedDeclaration()) {
7653                 ComponentLB =
7654                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7655                         .getAddress(CGF);
7656                 Size = CGF.Builder.CreatePtrDiff(
7657                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7658                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7659                 break;
7660               }
7661             }
7662             BasePointers.push_back(BP.getPointer());
7663             Pointers.push_back(LB.getPointer());
7664             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7665                                                       /*isSigned=*/true));
7666             Types.push_back(Flags);
7667             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7668           }
7669           BasePointers.push_back(BP.getPointer());
7670           Pointers.push_back(LB.getPointer());
7671           Size = CGF.Builder.CreatePtrDiff(
7672               CGF.EmitCastToVoidPtr(
7673                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7674               CGF.EmitCastToVoidPtr(LB.getPointer()));
7675           Sizes.push_back(
7676               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7677           Types.push_back(Flags);
7678           break;
7679         }
7680         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7681         if (!IsMemberPointer) {
7682           BasePointers.push_back(BP.getPointer());
7683           Pointers.push_back(LB.getPointer());
7684           Sizes.push_back(
7685               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7686 
7687           // We need to add a pointer flag for each map that comes from the
7688           // same expression except for the first one. We also need to signal
7689           // this map is the first one that relates with the current capture
7690           // (there is a set of entries for each capture).
7691           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7692               MapType, MapModifiers, IsImplicit,
7693               !IsExpressionFirstInfo || RequiresReference,
7694               IsCaptureFirstInfo && !RequiresReference);
7695 
7696           if (!IsExpressionFirstInfo) {
7697             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7698             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7699             if (IsPointer)
7700               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7701                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7702 
7703             if (ShouldBeMemberOf) {
7704               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7705               // should be later updated with the correct value of MEMBER_OF.
7706               Flags |= OMP_MAP_MEMBER_OF;
7707               // From now on, all subsequent PTR_AND_OBJ entries should not be
7708               // marked as MEMBER_OF.
7709               ShouldBeMemberOf = false;
7710             }
7711           }
7712 
7713           Types.push_back(Flags);
7714         }
7715 
7716         // If we have encountered a member expression so far, keep track of the
7717         // mapped member. If the parent is "*this", then the value declaration
7718         // is nullptr.
7719         if (EncounteredME) {
7720           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7721           unsigned FieldIndex = FD->getFieldIndex();
7722 
7723           // Update info about the lowest and highest elements for this struct
7724           if (!PartialStruct.Base.isValid()) {
7725             PartialStruct.LowestElem = {FieldIndex, LB};
7726             PartialStruct.HighestElem = {FieldIndex, LB};
7727             PartialStruct.Base = BP;
7728           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7729             PartialStruct.LowestElem = {FieldIndex, LB};
7730           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7731             PartialStruct.HighestElem = {FieldIndex, LB};
7732           }
7733         }
7734 
7735         // If we have a final array section, we are done with this expression.
7736         if (IsFinalArraySection)
7737           break;
7738 
7739         // The pointer becomes the base for the next element.
7740         if (Next != CE)
7741           BP = LB;
7742 
7743         IsExpressionFirstInfo = false;
7744         IsCaptureFirstInfo = false;
7745       }
7746     }
7747   }
7748 
7749   /// Return the adjusted map modifiers if the declaration a capture refers to
7750   /// appears in a first-private clause. This is expected to be used only with
7751   /// directives that start with 'target'.
7752   MappableExprsHandler::OpenMPOffloadMappingFlags
7753   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7754     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7755 
7756     // A first private variable captured by reference will use only the
7757     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7758     // declaration is known as first-private in this handler.
7759     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7760       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7761           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7762         return MappableExprsHandler::OMP_MAP_ALWAYS |
7763                MappableExprsHandler::OMP_MAP_TO;
7764       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7765         return MappableExprsHandler::OMP_MAP_TO |
7766                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7767       return MappableExprsHandler::OMP_MAP_PRIVATE |
7768              MappableExprsHandler::OMP_MAP_TO;
7769     }
7770     return MappableExprsHandler::OMP_MAP_TO |
7771            MappableExprsHandler::OMP_MAP_FROM;
7772   }
7773 
7774   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7775     // Rotate by getFlagMemberOffset() bits.
7776     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7777                                                   << getFlagMemberOffset());
7778   }
7779 
7780   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7781                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7782     // If the entry is PTR_AND_OBJ but has not been marked with the special
7783     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7784     // marked as MEMBER_OF.
7785     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7786         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7787       return;
7788 
7789     // Reset the placeholder value to prepare the flag for the assignment of the
7790     // proper MEMBER_OF value.
7791     Flags &= ~OMP_MAP_MEMBER_OF;
7792     Flags |= MemberOfFlag;
7793   }
7794 
7795   void getPlainLayout(const CXXRecordDecl *RD,
7796                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7797                       bool AsBase) const {
7798     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7799 
7800     llvm::StructType *St =
7801         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7802 
7803     unsigned NumElements = St->getNumElements();
7804     llvm::SmallVector<
7805         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7806         RecordLayout(NumElements);
7807 
7808     // Fill bases.
7809     for (const auto &I : RD->bases()) {
7810       if (I.isVirtual())
7811         continue;
7812       const auto *Base = I.getType()->getAsCXXRecordDecl();
7813       // Ignore empty bases.
7814       if (Base->isEmpty() || CGF.getContext()
7815                                  .getASTRecordLayout(Base)
7816                                  .getNonVirtualSize()
7817                                  .isZero())
7818         continue;
7819 
7820       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7821       RecordLayout[FieldIndex] = Base;
7822     }
7823     // Fill in virtual bases.
7824     for (const auto &I : RD->vbases()) {
7825       const auto *Base = I.getType()->getAsCXXRecordDecl();
7826       // Ignore empty bases.
7827       if (Base->isEmpty())
7828         continue;
7829       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7830       if (RecordLayout[FieldIndex])
7831         continue;
7832       RecordLayout[FieldIndex] = Base;
7833     }
7834     // Fill in all the fields.
7835     assert(!RD->isUnion() && "Unexpected union.");
7836     for (const auto *Field : RD->fields()) {
7837       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7838       // will fill in later.)
7839       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7840         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7841         RecordLayout[FieldIndex] = Field;
7842       }
7843     }
7844     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7845              &Data : RecordLayout) {
7846       if (Data.isNull())
7847         continue;
7848       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7849         getPlainLayout(Base, Layout, /*AsBase=*/true);
7850       else
7851         Layout.push_back(Data.get<const FieldDecl *>());
7852     }
7853   }
7854 
7855 public:
7856   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7857       : CurDir(&Dir), CGF(CGF) {
7858     // Extract firstprivate clause information.
7859     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7860       for (const auto *D : C->varlists())
7861         FirstPrivateDecls.try_emplace(
7862             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7863     // Extract device pointer clause information.
7864     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7865       for (auto L : C->component_lists())
7866         DevPointersMap[L.first].push_back(L.second);
7867   }
7868 
7869   /// Constructor for the declare mapper directive.
7870   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7871       : CurDir(&Dir), CGF(CGF) {}
7872 
7873   /// Generate code for the combined entry if we have a partially mapped struct
7874   /// and take care of the mapping flags of the arguments corresponding to
7875   /// individual struct members.
7876   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7877                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7878                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7879                          const StructRangeInfoTy &PartialStruct) const {
7880     // Base is the base of the struct
7881     BasePointers.push_back(PartialStruct.Base.getPointer());
7882     // Pointer is the address of the lowest element
7883     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7884     Pointers.push_back(LB);
7885     // Size is (addr of {highest+1} element) - (addr of lowest element)
7886     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7887     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7888     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7889     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7890     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7891     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7892                                                   /*isSigned=*/false);
7893     Sizes.push_back(Size);
7894     // Map type is always TARGET_PARAM
7895     Types.push_back(OMP_MAP_TARGET_PARAM);
7896     // Remove TARGET_PARAM flag from the first element
7897     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7898 
7899     // All other current entries will be MEMBER_OF the combined entry
7900     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7901     // 0xFFFF in the MEMBER_OF field).
7902     OpenMPOffloadMappingFlags MemberOfFlag =
7903         getMemberOfFlag(BasePointers.size() - 1);
7904     for (auto &M : CurTypes)
7905       setCorrectMemberOfFlag(M, MemberOfFlag);
7906   }
7907 
7908   /// Generate all the base pointers, section pointers, sizes and map
7909   /// types for the extracted mappable expressions. Also, for each item that
7910   /// relates with a device pointer, a pair of the relevant declaration and
7911   /// index where it occurs is appended to the device pointers info array.
7912   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7913                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7914                        MapFlagsArrayTy &Types) const {
7915     // We have to process the component lists that relate with the same
7916     // declaration in a single chunk so that we can generate the map flags
7917     // correctly. Therefore, we organize all lists in a map.
7918     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7919 
7920     // Helper function to fill the information map for the different supported
7921     // clauses.
7922     auto &&InfoGen = [&Info](
7923         const ValueDecl *D,
7924         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7925         OpenMPMapClauseKind MapType,
7926         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7927         bool ReturnDevicePointer, bool IsImplicit) {
7928       const ValueDecl *VD =
7929           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7930       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7931                             IsImplicit);
7932     };
7933 
7934     assert(CurDir.is<const OMPExecutableDirective *>() &&
7935            "Expect a executable directive");
7936     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7937     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7938       for (const auto L : C->component_lists()) {
7939         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7940             /*ReturnDevicePointer=*/false, C->isImplicit());
7941       }
7942     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7943       for (const auto L : C->component_lists()) {
7944         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7945             /*ReturnDevicePointer=*/false, C->isImplicit());
7946       }
7947     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7948       for (const auto L : C->component_lists()) {
7949         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7950             /*ReturnDevicePointer=*/false, C->isImplicit());
7951       }
7952 
7953     // Look at the use_device_ptr clause information and mark the existing map
7954     // entries as such. If there is no map information for an entry in the
7955     // use_device_ptr list, we create one with map type 'alloc' and zero size
7956     // section. It is the user fault if that was not mapped before. If there is
7957     // no map information and the pointer is a struct member, then we defer the
7958     // emission of that entry until the whole struct has been processed.
7959     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7960         DeferredInfo;
7961 
7962     for (const auto *C :
7963          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7964       for (const auto L : C->component_lists()) {
7965         assert(!L.second.empty() && "Not expecting empty list of components!");
7966         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7967         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7968         const Expr *IE = L.second.back().getAssociatedExpression();
7969         // If the first component is a member expression, we have to look into
7970         // 'this', which maps to null in the map of map information. Otherwise
7971         // look directly for the information.
7972         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7973 
7974         // We potentially have map information for this declaration already.
7975         // Look for the first set of components that refer to it.
7976         if (It != Info.end()) {
7977           auto CI = std::find_if(
7978               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7979                 return MI.Components.back().getAssociatedDeclaration() == VD;
7980               });
7981           // If we found a map entry, signal that the pointer has to be returned
7982           // and move on to the next declaration.
7983           if (CI != It->second.end()) {
7984             CI->ReturnDevicePointer = true;
7985             continue;
7986           }
7987         }
7988 
7989         // We didn't find any match in our map information - generate a zero
7990         // size array section - if the pointer is a struct member we defer this
7991         // action until the whole struct has been processed.
7992         if (isa<MemberExpr>(IE)) {
7993           // Insert the pointer into Info to be processed by
7994           // generateInfoForComponentList. Because it is a member pointer
7995           // without a pointee, no entry will be generated for it, therefore
7996           // we need to generate one after the whole struct has been processed.
7997           // Nonetheless, generateInfoForComponentList must be called to take
7998           // the pointer into account for the calculation of the range of the
7999           // partial struct.
8000           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8001                   /*ReturnDevicePointer=*/false, C->isImplicit());
8002           DeferredInfo[nullptr].emplace_back(IE, VD);
8003         } else {
8004           llvm::Value *Ptr =
8005               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8006           BasePointers.emplace_back(Ptr, VD);
8007           Pointers.push_back(Ptr);
8008           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8009           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8010         }
8011       }
8012     }
8013 
8014     for (const auto &M : Info) {
8015       // We need to know when we generate information for the first component
8016       // associated with a capture, because the mapping flags depend on it.
8017       bool IsFirstComponentList = true;
8018 
8019       // Temporary versions of arrays
8020       MapBaseValuesArrayTy CurBasePointers;
8021       MapValuesArrayTy CurPointers;
8022       MapValuesArrayTy CurSizes;
8023       MapFlagsArrayTy CurTypes;
8024       StructRangeInfoTy PartialStruct;
8025 
8026       for (const MapInfo &L : M.second) {
8027         assert(!L.Components.empty() &&
8028                "Not expecting declaration with no component lists.");
8029 
8030         // Remember the current base pointer index.
8031         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8032         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8033                                      CurBasePointers, CurPointers, CurSizes,
8034                                      CurTypes, PartialStruct,
8035                                      IsFirstComponentList, L.IsImplicit);
8036 
8037         // If this entry relates with a device pointer, set the relevant
8038         // declaration and add the 'return pointer' flag.
8039         if (L.ReturnDevicePointer) {
8040           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8041                  "Unexpected number of mapped base pointers.");
8042 
8043           const ValueDecl *RelevantVD =
8044               L.Components.back().getAssociatedDeclaration();
8045           assert(RelevantVD &&
8046                  "No relevant declaration related with device pointer??");
8047 
8048           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8049           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8050         }
8051         IsFirstComponentList = false;
8052       }
8053 
8054       // Append any pending zero-length pointers which are struct members and
8055       // used with use_device_ptr.
8056       auto CI = DeferredInfo.find(M.first);
8057       if (CI != DeferredInfo.end()) {
8058         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8059           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8060           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8061               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8062           CurBasePointers.emplace_back(BasePtr, L.VD);
8063           CurPointers.push_back(Ptr);
8064           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8065           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8066           // value MEMBER_OF=FFFF so that the entry is later updated with the
8067           // correct value of MEMBER_OF.
8068           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8069                              OMP_MAP_MEMBER_OF);
8070         }
8071       }
8072 
8073       // If there is an entry in PartialStruct it means we have a struct with
8074       // individual members mapped. Emit an extra combined entry.
8075       if (PartialStruct.Base.isValid())
8076         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8077                           PartialStruct);
8078 
8079       // We need to append the results of this capture to what we already have.
8080       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8081       Pointers.append(CurPointers.begin(), CurPointers.end());
8082       Sizes.append(CurSizes.begin(), CurSizes.end());
8083       Types.append(CurTypes.begin(), CurTypes.end());
8084     }
8085   }
8086 
8087   /// Generate all the base pointers, section pointers, sizes and map types for
8088   /// the extracted map clauses of user-defined mapper.
8089   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8090                                 MapValuesArrayTy &Pointers,
8091                                 MapValuesArrayTy &Sizes,
8092                                 MapFlagsArrayTy &Types) const {
8093     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8094            "Expect a declare mapper directive");
8095     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8096     // We have to process the component lists that relate with the same
8097     // declaration in a single chunk so that we can generate the map flags
8098     // correctly. Therefore, we organize all lists in a map.
8099     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8100 
8101     // Helper function to fill the information map for the different supported
8102     // clauses.
8103     auto &&InfoGen = [&Info](
8104         const ValueDecl *D,
8105         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8106         OpenMPMapClauseKind MapType,
8107         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8108         bool ReturnDevicePointer, bool IsImplicit) {
8109       const ValueDecl *VD =
8110           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8111       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8112                             IsImplicit);
8113     };
8114 
8115     for (const auto *C : CurMapperDir->clauselists()) {
8116       const auto *MC = cast<OMPMapClause>(C);
8117       for (const auto L : MC->component_lists()) {
8118         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8119                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8120       }
8121     }
8122 
8123     for (const auto &M : Info) {
8124       // We need to know when we generate information for the first component
8125       // associated with a capture, because the mapping flags depend on it.
8126       bool IsFirstComponentList = true;
8127 
8128       // Temporary versions of arrays
8129       MapBaseValuesArrayTy CurBasePointers;
8130       MapValuesArrayTy CurPointers;
8131       MapValuesArrayTy CurSizes;
8132       MapFlagsArrayTy CurTypes;
8133       StructRangeInfoTy PartialStruct;
8134 
8135       for (const MapInfo &L : M.second) {
8136         assert(!L.Components.empty() &&
8137                "Not expecting declaration with no component lists.");
8138         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8139                                      CurBasePointers, CurPointers, CurSizes,
8140                                      CurTypes, PartialStruct,
8141                                      IsFirstComponentList, L.IsImplicit);
8142         IsFirstComponentList = false;
8143       }
8144 
8145       // If there is an entry in PartialStruct it means we have a struct with
8146       // individual members mapped. Emit an extra combined entry.
8147       if (PartialStruct.Base.isValid())
8148         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8149                           PartialStruct);
8150 
8151       // We need to append the results of this capture to what we already have.
8152       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8153       Pointers.append(CurPointers.begin(), CurPointers.end());
8154       Sizes.append(CurSizes.begin(), CurSizes.end());
8155       Types.append(CurTypes.begin(), CurTypes.end());
8156     }
8157   }
8158 
8159   /// Emit capture info for lambdas for variables captured by reference.
8160   void generateInfoForLambdaCaptures(
8161       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8162       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8163       MapFlagsArrayTy &Types,
8164       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8165     const auto *RD = VD->getType()
8166                          .getCanonicalType()
8167                          .getNonReferenceType()
8168                          ->getAsCXXRecordDecl();
8169     if (!RD || !RD->isLambda())
8170       return;
8171     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8172     LValue VDLVal = CGF.MakeAddrLValue(
8173         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8174     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8175     FieldDecl *ThisCapture = nullptr;
8176     RD->getCaptureFields(Captures, ThisCapture);
8177     if (ThisCapture) {
8178       LValue ThisLVal =
8179           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8180       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8181       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8182                                  VDLVal.getPointer(CGF));
8183       BasePointers.push_back(ThisLVal.getPointer(CGF));
8184       Pointers.push_back(ThisLValVal.getPointer(CGF));
8185       Sizes.push_back(
8186           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8187                                     CGF.Int64Ty, /*isSigned=*/true));
8188       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8189                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8190     }
8191     for (const LambdaCapture &LC : RD->captures()) {
8192       if (!LC.capturesVariable())
8193         continue;
8194       const VarDecl *VD = LC.getCapturedVar();
8195       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8196         continue;
8197       auto It = Captures.find(VD);
8198       assert(It != Captures.end() && "Found lambda capture without field.");
8199       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8200       if (LC.getCaptureKind() == LCK_ByRef) {
8201         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8202         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8203                                    VDLVal.getPointer(CGF));
8204         BasePointers.push_back(VarLVal.getPointer(CGF));
8205         Pointers.push_back(VarLValVal.getPointer(CGF));
8206         Sizes.push_back(CGF.Builder.CreateIntCast(
8207             CGF.getTypeSize(
8208                 VD->getType().getCanonicalType().getNonReferenceType()),
8209             CGF.Int64Ty, /*isSigned=*/true));
8210       } else {
8211         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8212         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8213                                    VDLVal.getPointer(CGF));
8214         BasePointers.push_back(VarLVal.getPointer(CGF));
8215         Pointers.push_back(VarRVal.getScalarVal());
8216         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8217       }
8218       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8219                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8220     }
8221   }
8222 
8223   /// Set correct indices for lambdas captures.
8224   void adjustMemberOfForLambdaCaptures(
8225       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8226       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8227       MapFlagsArrayTy &Types) const {
8228     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8229       // Set correct member_of idx for all implicit lambda captures.
8230       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8231                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8232         continue;
8233       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8234       assert(BasePtr && "Unable to find base lambda address.");
8235       int TgtIdx = -1;
8236       for (unsigned J = I; J > 0; --J) {
8237         unsigned Idx = J - 1;
8238         if (Pointers[Idx] != BasePtr)
8239           continue;
8240         TgtIdx = Idx;
8241         break;
8242       }
8243       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8244       // All other current entries will be MEMBER_OF the combined entry
8245       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8246       // 0xFFFF in the MEMBER_OF field).
8247       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8248       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8249     }
8250   }
8251 
8252   /// Generate the base pointers, section pointers, sizes and map types
8253   /// associated to a given capture.
8254   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8255                               llvm::Value *Arg,
8256                               MapBaseValuesArrayTy &BasePointers,
8257                               MapValuesArrayTy &Pointers,
8258                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8259                               StructRangeInfoTy &PartialStruct) const {
8260     assert(!Cap->capturesVariableArrayType() &&
8261            "Not expecting to generate map info for a variable array type!");
8262 
8263     // We need to know when we generating information for the first component
8264     const ValueDecl *VD = Cap->capturesThis()
8265                               ? nullptr
8266                               : Cap->getCapturedVar()->getCanonicalDecl();
8267 
8268     // If this declaration appears in a is_device_ptr clause we just have to
8269     // pass the pointer by value. If it is a reference to a declaration, we just
8270     // pass its value.
8271     if (DevPointersMap.count(VD)) {
8272       BasePointers.emplace_back(Arg, VD);
8273       Pointers.push_back(Arg);
8274       Sizes.push_back(
8275           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8276                                     CGF.Int64Ty, /*isSigned=*/true));
8277       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8278       return;
8279     }
8280 
8281     using MapData =
8282         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8283                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8284     SmallVector<MapData, 4> DeclComponentLists;
8285     assert(CurDir.is<const OMPExecutableDirective *>() &&
8286            "Expect a executable directive");
8287     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8288     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8289       for (const auto L : C->decl_component_lists(VD)) {
8290         assert(L.first == VD &&
8291                "We got information for the wrong declaration??");
8292         assert(!L.second.empty() &&
8293                "Not expecting declaration with no component lists.");
8294         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8295                                         C->getMapTypeModifiers(),
8296                                         C->isImplicit());
8297       }
8298     }
8299 
8300     // Find overlapping elements (including the offset from the base element).
8301     llvm::SmallDenseMap<
8302         const MapData *,
8303         llvm::SmallVector<
8304             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8305         4>
8306         OverlappedData;
8307     size_t Count = 0;
8308     for (const MapData &L : DeclComponentLists) {
8309       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8310       OpenMPMapClauseKind MapType;
8311       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8312       bool IsImplicit;
8313       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8314       ++Count;
8315       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8316         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8317         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8318         auto CI = Components.rbegin();
8319         auto CE = Components.rend();
8320         auto SI = Components1.rbegin();
8321         auto SE = Components1.rend();
8322         for (; CI != CE && SI != SE; ++CI, ++SI) {
8323           if (CI->getAssociatedExpression()->getStmtClass() !=
8324               SI->getAssociatedExpression()->getStmtClass())
8325             break;
8326           // Are we dealing with different variables/fields?
8327           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8328             break;
8329         }
8330         // Found overlapping if, at least for one component, reached the head of
8331         // the components list.
8332         if (CI == CE || SI == SE) {
8333           assert((CI != CE || SI != SE) &&
8334                  "Unexpected full match of the mapping components.");
8335           const MapData &BaseData = CI == CE ? L : L1;
8336           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8337               SI == SE ? Components : Components1;
8338           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8339           OverlappedElements.getSecond().push_back(SubData);
8340         }
8341       }
8342     }
8343     // Sort the overlapped elements for each item.
8344     llvm::SmallVector<const FieldDecl *, 4> Layout;
8345     if (!OverlappedData.empty()) {
8346       if (const auto *CRD =
8347               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8348         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8349       else {
8350         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8351         Layout.append(RD->field_begin(), RD->field_end());
8352       }
8353     }
8354     for (auto &Pair : OverlappedData) {
8355       llvm::sort(
8356           Pair.getSecond(),
8357           [&Layout](
8358               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8359               OMPClauseMappableExprCommon::MappableExprComponentListRef
8360                   Second) {
8361             auto CI = First.rbegin();
8362             auto CE = First.rend();
8363             auto SI = Second.rbegin();
8364             auto SE = Second.rend();
8365             for (; CI != CE && SI != SE; ++CI, ++SI) {
8366               if (CI->getAssociatedExpression()->getStmtClass() !=
8367                   SI->getAssociatedExpression()->getStmtClass())
8368                 break;
8369               // Are we dealing with different variables/fields?
8370               if (CI->getAssociatedDeclaration() !=
8371                   SI->getAssociatedDeclaration())
8372                 break;
8373             }
8374 
8375             // Lists contain the same elements.
8376             if (CI == CE && SI == SE)
8377               return false;
8378 
8379             // List with less elements is less than list with more elements.
8380             if (CI == CE || SI == SE)
8381               return CI == CE;
8382 
8383             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8384             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8385             if (FD1->getParent() == FD2->getParent())
8386               return FD1->getFieldIndex() < FD2->getFieldIndex();
8387             const auto It =
8388                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8389                   return FD == FD1 || FD == FD2;
8390                 });
8391             return *It == FD1;
8392           });
8393     }
8394 
8395     // Associated with a capture, because the mapping flags depend on it.
8396     // Go through all of the elements with the overlapped elements.
8397     for (const auto &Pair : OverlappedData) {
8398       const MapData &L = *Pair.getFirst();
8399       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8400       OpenMPMapClauseKind MapType;
8401       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8402       bool IsImplicit;
8403       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8404       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8405           OverlappedComponents = Pair.getSecond();
8406       bool IsFirstComponentList = true;
8407       generateInfoForComponentList(MapType, MapModifiers, Components,
8408                                    BasePointers, Pointers, Sizes, Types,
8409                                    PartialStruct, IsFirstComponentList,
8410                                    IsImplicit, OverlappedComponents);
8411     }
8412     // Go through other elements without overlapped elements.
8413     bool IsFirstComponentList = OverlappedData.empty();
8414     for (const MapData &L : DeclComponentLists) {
8415       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8416       OpenMPMapClauseKind MapType;
8417       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8418       bool IsImplicit;
8419       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8420       auto It = OverlappedData.find(&L);
8421       if (It == OverlappedData.end())
8422         generateInfoForComponentList(MapType, MapModifiers, Components,
8423                                      BasePointers, Pointers, Sizes, Types,
8424                                      PartialStruct, IsFirstComponentList,
8425                                      IsImplicit);
8426       IsFirstComponentList = false;
8427     }
8428   }
8429 
8430   /// Generate the base pointers, section pointers, sizes and map types
8431   /// associated with the declare target link variables.
8432   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8433                                         MapValuesArrayTy &Pointers,
8434                                         MapValuesArrayTy &Sizes,
8435                                         MapFlagsArrayTy &Types) const {
8436     assert(CurDir.is<const OMPExecutableDirective *>() &&
8437            "Expect a executable directive");
8438     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8439     // Map other list items in the map clause which are not captured variables
8440     // but "declare target link" global variables.
8441     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8442       for (const auto L : C->component_lists()) {
8443         if (!L.first)
8444           continue;
8445         const auto *VD = dyn_cast<VarDecl>(L.first);
8446         if (!VD)
8447           continue;
8448         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8449             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8450         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8451             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8452           continue;
8453         StructRangeInfoTy PartialStruct;
8454         generateInfoForComponentList(
8455             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8456             Pointers, Sizes, Types, PartialStruct,
8457             /*IsFirstComponentList=*/true, C->isImplicit());
8458         assert(!PartialStruct.Base.isValid() &&
8459                "No partial structs for declare target link expected.");
8460       }
8461     }
8462   }
8463 
8464   /// Generate the default map information for a given capture \a CI,
8465   /// record field declaration \a RI and captured value \a CV.
8466   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8467                               const FieldDecl &RI, llvm::Value *CV,
8468                               MapBaseValuesArrayTy &CurBasePointers,
8469                               MapValuesArrayTy &CurPointers,
8470                               MapValuesArrayTy &CurSizes,
8471                               MapFlagsArrayTy &CurMapTypes) const {
8472     bool IsImplicit = true;
8473     // Do the default mapping.
8474     if (CI.capturesThis()) {
8475       CurBasePointers.push_back(CV);
8476       CurPointers.push_back(CV);
8477       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8478       CurSizes.push_back(
8479           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8480                                     CGF.Int64Ty, /*isSigned=*/true));
8481       // Default map type.
8482       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8483     } else if (CI.capturesVariableByCopy()) {
8484       CurBasePointers.push_back(CV);
8485       CurPointers.push_back(CV);
8486       if (!RI.getType()->isAnyPointerType()) {
8487         // We have to signal to the runtime captures passed by value that are
8488         // not pointers.
8489         CurMapTypes.push_back(OMP_MAP_LITERAL);
8490         CurSizes.push_back(CGF.Builder.CreateIntCast(
8491             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8492       } else {
8493         // Pointers are implicitly mapped with a zero size and no flags
8494         // (other than first map that is added for all implicit maps).
8495         CurMapTypes.push_back(OMP_MAP_NONE);
8496         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8497       }
8498       const VarDecl *VD = CI.getCapturedVar();
8499       auto I = FirstPrivateDecls.find(VD);
8500       if (I != FirstPrivateDecls.end())
8501         IsImplicit = I->getSecond();
8502     } else {
8503       assert(CI.capturesVariable() && "Expected captured reference.");
8504       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8505       QualType ElementType = PtrTy->getPointeeType();
8506       CurSizes.push_back(CGF.Builder.CreateIntCast(
8507           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8508       // The default map type for a scalar/complex type is 'to' because by
8509       // default the value doesn't have to be retrieved. For an aggregate
8510       // type, the default is 'tofrom'.
8511       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8512       const VarDecl *VD = CI.getCapturedVar();
8513       auto I = FirstPrivateDecls.find(VD);
8514       if (I != FirstPrivateDecls.end() &&
8515           VD->getType().isConstant(CGF.getContext())) {
8516         llvm::Constant *Addr =
8517             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8518         // Copy the value of the original variable to the new global copy.
8519         CGF.Builder.CreateMemCpy(
8520             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8521             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8522             CurSizes.back(), /*IsVolatile=*/false);
8523         // Use new global variable as the base pointers.
8524         CurBasePointers.push_back(Addr);
8525         CurPointers.push_back(Addr);
8526       } else {
8527         CurBasePointers.push_back(CV);
8528         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8529           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8530               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8531               AlignmentSource::Decl));
8532           CurPointers.push_back(PtrAddr.getPointer());
8533         } else {
8534           CurPointers.push_back(CV);
8535         }
8536       }
8537       if (I != FirstPrivateDecls.end())
8538         IsImplicit = I->getSecond();
8539     }
8540     // Every default map produces a single argument which is a target parameter.
8541     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8542 
8543     // Add flag stating this is an implicit map.
8544     if (IsImplicit)
8545       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8546   }
8547 };
8548 } // anonymous namespace
8549 
8550 /// Emit the arrays used to pass the captures and map information to the
8551 /// offloading runtime library. If there is no map or capture information,
8552 /// return nullptr by reference.
8553 static void
8554 emitOffloadingArrays(CodeGenFunction &CGF,
8555                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8556                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8557                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8558                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8559                      CGOpenMPRuntime::TargetDataInfo &Info) {
8560   CodeGenModule &CGM = CGF.CGM;
8561   ASTContext &Ctx = CGF.getContext();
8562 
8563   // Reset the array information.
8564   Info.clearArrayInfo();
8565   Info.NumberOfPtrs = BasePointers.size();
8566 
8567   if (Info.NumberOfPtrs) {
8568     // Detect if we have any capture size requiring runtime evaluation of the
8569     // size so that a constant array could be eventually used.
8570     bool hasRuntimeEvaluationCaptureSize = false;
8571     for (llvm::Value *S : Sizes)
8572       if (!isa<llvm::Constant>(S)) {
8573         hasRuntimeEvaluationCaptureSize = true;
8574         break;
8575       }
8576 
8577     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8578     QualType PointerArrayType = Ctx.getConstantArrayType(
8579         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8580         /*IndexTypeQuals=*/0);
8581 
8582     Info.BasePointersArray =
8583         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8584     Info.PointersArray =
8585         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8586 
8587     // If we don't have any VLA types or other types that require runtime
8588     // evaluation, we can use a constant array for the map sizes, otherwise we
8589     // need to fill up the arrays as we do for the pointers.
8590     QualType Int64Ty =
8591         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8592     if (hasRuntimeEvaluationCaptureSize) {
8593       QualType SizeArrayType = Ctx.getConstantArrayType(
8594           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8595           /*IndexTypeQuals=*/0);
8596       Info.SizesArray =
8597           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8598     } else {
8599       // We expect all the sizes to be constant, so we collect them to create
8600       // a constant array.
8601       SmallVector<llvm::Constant *, 16> ConstSizes;
8602       for (llvm::Value *S : Sizes)
8603         ConstSizes.push_back(cast<llvm::Constant>(S));
8604 
8605       auto *SizesArrayInit = llvm::ConstantArray::get(
8606           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8607       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8608       auto *SizesArrayGbl = new llvm::GlobalVariable(
8609           CGM.getModule(), SizesArrayInit->getType(),
8610           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8611           SizesArrayInit, Name);
8612       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8613       Info.SizesArray = SizesArrayGbl;
8614     }
8615 
8616     // The map types are always constant so we don't need to generate code to
8617     // fill arrays. Instead, we create an array constant.
8618     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8619     llvm::copy(MapTypes, Mapping.begin());
8620     llvm::Constant *MapTypesArrayInit =
8621         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8622     std::string MaptypesName =
8623         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8624     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8625         CGM.getModule(), MapTypesArrayInit->getType(),
8626         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8627         MapTypesArrayInit, MaptypesName);
8628     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8629     Info.MapTypesArray = MapTypesArrayGbl;
8630 
8631     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8632       llvm::Value *BPVal = *BasePointers[I];
8633       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8634           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8635           Info.BasePointersArray, 0, I);
8636       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8637           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8638       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8639       CGF.Builder.CreateStore(BPVal, BPAddr);
8640 
8641       if (Info.requiresDevicePointerInfo())
8642         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8643           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8644 
8645       llvm::Value *PVal = Pointers[I];
8646       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8647           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8648           Info.PointersArray, 0, I);
8649       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8650           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8651       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8652       CGF.Builder.CreateStore(PVal, PAddr);
8653 
8654       if (hasRuntimeEvaluationCaptureSize) {
8655         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8656             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8657             Info.SizesArray,
8658             /*Idx0=*/0,
8659             /*Idx1=*/I);
8660         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8661         CGF.Builder.CreateStore(
8662             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8663             SAddr);
8664       }
8665     }
8666   }
8667 }
8668 
8669 /// Emit the arguments to be passed to the runtime library based on the
8670 /// arrays of pointers, sizes and map types.
8671 static void emitOffloadingArraysArgument(
8672     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8673     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8674     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8675   CodeGenModule &CGM = CGF.CGM;
8676   if (Info.NumberOfPtrs) {
8677     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8679         Info.BasePointersArray,
8680         /*Idx0=*/0, /*Idx1=*/0);
8681     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8682         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8683         Info.PointersArray,
8684         /*Idx0=*/0,
8685         /*Idx1=*/0);
8686     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8687         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8688         /*Idx0=*/0, /*Idx1=*/0);
8689     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8690         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8691         Info.MapTypesArray,
8692         /*Idx0=*/0,
8693         /*Idx1=*/0);
8694   } else {
8695     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8696     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8697     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8698     MapTypesArrayArg =
8699         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8700   }
8701 }
8702 
8703 /// Check for inner distribute directive.
8704 static const OMPExecutableDirective *
8705 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8706   const auto *CS = D.getInnermostCapturedStmt();
8707   const auto *Body =
8708       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8709   const Stmt *ChildStmt =
8710       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8711 
8712   if (const auto *NestedDir =
8713           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8714     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8715     switch (D.getDirectiveKind()) {
8716     case OMPD_target:
8717       if (isOpenMPDistributeDirective(DKind))
8718         return NestedDir;
8719       if (DKind == OMPD_teams) {
8720         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8721             /*IgnoreCaptured=*/true);
8722         if (!Body)
8723           return nullptr;
8724         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8725         if (const auto *NND =
8726                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8727           DKind = NND->getDirectiveKind();
8728           if (isOpenMPDistributeDirective(DKind))
8729             return NND;
8730         }
8731       }
8732       return nullptr;
8733     case OMPD_target_teams:
8734       if (isOpenMPDistributeDirective(DKind))
8735         return NestedDir;
8736       return nullptr;
8737     case OMPD_target_parallel:
8738     case OMPD_target_simd:
8739     case OMPD_target_parallel_for:
8740     case OMPD_target_parallel_for_simd:
8741       return nullptr;
8742     case OMPD_target_teams_distribute:
8743     case OMPD_target_teams_distribute_simd:
8744     case OMPD_target_teams_distribute_parallel_for:
8745     case OMPD_target_teams_distribute_parallel_for_simd:
8746     case OMPD_parallel:
8747     case OMPD_for:
8748     case OMPD_parallel_for:
8749     case OMPD_parallel_master:
8750     case OMPD_parallel_sections:
8751     case OMPD_for_simd:
8752     case OMPD_parallel_for_simd:
8753     case OMPD_cancel:
8754     case OMPD_cancellation_point:
8755     case OMPD_ordered:
8756     case OMPD_threadprivate:
8757     case OMPD_allocate:
8758     case OMPD_task:
8759     case OMPD_simd:
8760     case OMPD_sections:
8761     case OMPD_section:
8762     case OMPD_single:
8763     case OMPD_master:
8764     case OMPD_critical:
8765     case OMPD_taskyield:
8766     case OMPD_barrier:
8767     case OMPD_taskwait:
8768     case OMPD_taskgroup:
8769     case OMPD_atomic:
8770     case OMPD_flush:
8771     case OMPD_teams:
8772     case OMPD_target_data:
8773     case OMPD_target_exit_data:
8774     case OMPD_target_enter_data:
8775     case OMPD_distribute:
8776     case OMPD_distribute_simd:
8777     case OMPD_distribute_parallel_for:
8778     case OMPD_distribute_parallel_for_simd:
8779     case OMPD_teams_distribute:
8780     case OMPD_teams_distribute_simd:
8781     case OMPD_teams_distribute_parallel_for:
8782     case OMPD_teams_distribute_parallel_for_simd:
8783     case OMPD_target_update:
8784     case OMPD_declare_simd:
8785     case OMPD_declare_variant:
8786     case OMPD_declare_target:
8787     case OMPD_end_declare_target:
8788     case OMPD_declare_reduction:
8789     case OMPD_declare_mapper:
8790     case OMPD_taskloop:
8791     case OMPD_taskloop_simd:
8792     case OMPD_master_taskloop:
8793     case OMPD_master_taskloop_simd:
8794     case OMPD_parallel_master_taskloop:
8795     case OMPD_parallel_master_taskloop_simd:
8796     case OMPD_requires:
8797     case OMPD_unknown:
8798       llvm_unreachable("Unexpected directive.");
8799     }
8800   }
8801 
8802   return nullptr;
8803 }
8804 
8805 /// Emit the user-defined mapper function. The code generation follows the
8806 /// pattern in the example below.
8807 /// \code
8808 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8809 ///                                           void *base, void *begin,
8810 ///                                           int64_t size, int64_t type) {
8811 ///   // Allocate space for an array section first.
8812 ///   if (size > 1 && !maptype.IsDelete)
8813 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8814 ///                                 size*sizeof(Ty), clearToFrom(type));
8815 ///   // Map members.
8816 ///   for (unsigned i = 0; i < size; i++) {
8817 ///     // For each component specified by this mapper:
8818 ///     for (auto c : all_components) {
8819 ///       if (c.hasMapper())
8820 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8821 ///                       c.arg_type);
8822 ///       else
8823 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8824 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8825 ///     }
8826 ///   }
8827 ///   // Delete the array section.
8828 ///   if (size > 1 && maptype.IsDelete)
8829 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8830 ///                                 size*sizeof(Ty), clearToFrom(type));
8831 /// }
8832 /// \endcode
8833 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8834                                             CodeGenFunction *CGF) {
8835   if (UDMMap.count(D) > 0)
8836     return;
8837   ASTContext &C = CGM.getContext();
8838   QualType Ty = D->getType();
8839   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8840   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8841   auto *MapperVarDecl =
8842       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8843   SourceLocation Loc = D->getLocation();
8844   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8845 
8846   // Prepare mapper function arguments and attributes.
8847   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8848                               C.VoidPtrTy, ImplicitParamDecl::Other);
8849   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8850                             ImplicitParamDecl::Other);
8851   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8852                              C.VoidPtrTy, ImplicitParamDecl::Other);
8853   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8854                             ImplicitParamDecl::Other);
8855   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8856                             ImplicitParamDecl::Other);
8857   FunctionArgList Args;
8858   Args.push_back(&HandleArg);
8859   Args.push_back(&BaseArg);
8860   Args.push_back(&BeginArg);
8861   Args.push_back(&SizeArg);
8862   Args.push_back(&TypeArg);
8863   const CGFunctionInfo &FnInfo =
8864       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8865   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8866   SmallString<64> TyStr;
8867   llvm::raw_svector_ostream Out(TyStr);
8868   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8869   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8870   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8871                                     Name, &CGM.getModule());
8872   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8873   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8874   // Start the mapper function code generation.
8875   CodeGenFunction MapperCGF(CGM);
8876   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8877   // Compute the starting and end addreses of array elements.
8878   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8879       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8880       C.getPointerType(Int64Ty), Loc);
8881   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8882       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8883       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8884   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8885   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8886       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8887       C.getPointerType(Int64Ty), Loc);
8888   // Prepare common arguments for array initiation and deletion.
8889   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8890       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8891       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8892   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8893       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8894       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8895   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8896       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8897       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8898 
8899   // Emit array initiation if this is an array section and \p MapType indicates
8900   // that memory allocation is required.
8901   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8902   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8903                              ElementSize, HeadBB, /*IsInit=*/true);
8904 
8905   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8906 
8907   // Emit the loop header block.
8908   MapperCGF.EmitBlock(HeadBB);
8909   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8910   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8911   // Evaluate whether the initial condition is satisfied.
8912   llvm::Value *IsEmpty =
8913       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8914   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8915   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8916 
8917   // Emit the loop body block.
8918   MapperCGF.EmitBlock(BodyBB);
8919   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8920       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8921   PtrPHI->addIncoming(PtrBegin, EntryBB);
8922   Address PtrCurrent =
8923       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8924                           .getAlignment()
8925                           .alignmentOfArrayElement(ElementSize));
8926   // Privatize the declared variable of mapper to be the current array element.
8927   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8928   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8929     return MapperCGF
8930         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8931         .getAddress(MapperCGF);
8932   });
8933   (void)Scope.Privatize();
8934 
8935   // Get map clause information. Fill up the arrays with all mapped variables.
8936   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8937   MappableExprsHandler::MapValuesArrayTy Pointers;
8938   MappableExprsHandler::MapValuesArrayTy Sizes;
8939   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8940   MappableExprsHandler MEHandler(*D, MapperCGF);
8941   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8942 
8943   // Call the runtime API __tgt_mapper_num_components to get the number of
8944   // pre-existing components.
8945   llvm::Value *OffloadingArgs[] = {Handle};
8946   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8947       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8948   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8949       PreviousSize,
8950       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8951 
8952   // Fill up the runtime mapper handle for all components.
8953   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8954     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8955         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8956     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8957         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8958     llvm::Value *CurSizeArg = Sizes[I];
8959 
8960     // Extract the MEMBER_OF field from the map type.
8961     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8962     MapperCGF.EmitBlock(MemberBB);
8963     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8964     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8965         OriMapType,
8966         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8967     llvm::BasicBlock *MemberCombineBB =
8968         MapperCGF.createBasicBlock("omp.member.combine");
8969     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8970     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8971     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8972     // Add the number of pre-existing components to the MEMBER_OF field if it
8973     // is valid.
8974     MapperCGF.EmitBlock(MemberCombineBB);
8975     llvm::Value *CombinedMember =
8976         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8977     // Do nothing if it is not a member of previous components.
8978     MapperCGF.EmitBlock(TypeBB);
8979     llvm::PHINode *MemberMapType =
8980         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8981     MemberMapType->addIncoming(OriMapType, MemberBB);
8982     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8983 
8984     // Combine the map type inherited from user-defined mapper with that
8985     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8986     // bits of the \a MapType, which is the input argument of the mapper
8987     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8988     // bits of MemberMapType.
8989     // [OpenMP 5.0], 1.2.6. map-type decay.
8990     //        | alloc |  to   | from  | tofrom | release | delete
8991     // ----------------------------------------------------------
8992     // alloc  | alloc | alloc | alloc | alloc  | release | delete
8993     // to     | alloc |  to   | alloc |   to   | release | delete
8994     // from   | alloc | alloc | from  |  from  | release | delete
8995     // tofrom | alloc |  to   | from  | tofrom | release | delete
8996     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8997         MapType,
8998         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8999                                    MappableExprsHandler::OMP_MAP_FROM));
9000     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9001     llvm::BasicBlock *AllocElseBB =
9002         MapperCGF.createBasicBlock("omp.type.alloc.else");
9003     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9004     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9005     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9006     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9007     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9008     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9009     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9010     MapperCGF.EmitBlock(AllocBB);
9011     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9012         MemberMapType,
9013         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9014                                      MappableExprsHandler::OMP_MAP_FROM)));
9015     MapperCGF.Builder.CreateBr(EndBB);
9016     MapperCGF.EmitBlock(AllocElseBB);
9017     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9018         LeftToFrom,
9019         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9020     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9021     // In case of to, clear OMP_MAP_FROM.
9022     MapperCGF.EmitBlock(ToBB);
9023     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9024         MemberMapType,
9025         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9026     MapperCGF.Builder.CreateBr(EndBB);
9027     MapperCGF.EmitBlock(ToElseBB);
9028     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9029         LeftToFrom,
9030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9031     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9032     // In case of from, clear OMP_MAP_TO.
9033     MapperCGF.EmitBlock(FromBB);
9034     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9035         MemberMapType,
9036         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9037     // In case of tofrom, do nothing.
9038     MapperCGF.EmitBlock(EndBB);
9039     llvm::PHINode *CurMapType =
9040         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9041     CurMapType->addIncoming(AllocMapType, AllocBB);
9042     CurMapType->addIncoming(ToMapType, ToBB);
9043     CurMapType->addIncoming(FromMapType, FromBB);
9044     CurMapType->addIncoming(MemberMapType, ToElseBB);
9045 
9046     // TODO: call the corresponding mapper function if a user-defined mapper is
9047     // associated with this map clause.
9048     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9049     // data structure.
9050     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9051                                      CurSizeArg, CurMapType};
9052     MapperCGF.EmitRuntimeCall(
9053         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9054         OffloadingArgs);
9055   }
9056 
9057   // Update the pointer to point to the next element that needs to be mapped,
9058   // and check whether we have mapped all elements.
9059   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9060       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9061   PtrPHI->addIncoming(PtrNext, BodyBB);
9062   llvm::Value *IsDone =
9063       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9064   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9065   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9066 
9067   MapperCGF.EmitBlock(ExitBB);
9068   // Emit array deletion if this is an array section and \p MapType indicates
9069   // that deletion is required.
9070   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9071                              ElementSize, DoneBB, /*IsInit=*/false);
9072 
9073   // Emit the function exit block.
9074   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9075   MapperCGF.FinishFunction();
9076   UDMMap.try_emplace(D, Fn);
9077   if (CGF) {
9078     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9079     Decls.second.push_back(D);
9080   }
9081 }
9082 
9083 /// Emit the array initialization or deletion portion for user-defined mapper
9084 /// code generation. First, it evaluates whether an array section is mapped and
9085 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9086 /// true, and \a MapType indicates to not delete this array, array
9087 /// initialization code is generated. If \a IsInit is false, and \a MapType
9088 /// indicates to not this array, array deletion code is generated.
9089 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9090     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9091     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9092     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9093   StringRef Prefix = IsInit ? ".init" : ".del";
9094 
9095   // Evaluate if this is an array section.
9096   llvm::BasicBlock *IsDeleteBB =
9097       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9098   llvm::BasicBlock *BodyBB =
9099       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9100   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9101       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9102   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9103 
9104   // Evaluate if we are going to delete this section.
9105   MapperCGF.EmitBlock(IsDeleteBB);
9106   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9107       MapType,
9108       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9109   llvm::Value *DeleteCond;
9110   if (IsInit) {
9111     DeleteCond = MapperCGF.Builder.CreateIsNull(
9112         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9113   } else {
9114     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9115         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9116   }
9117   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9118 
9119   MapperCGF.EmitBlock(BodyBB);
9120   // Get the array size by multiplying element size and element number (i.e., \p
9121   // Size).
9122   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9123       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9124   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9125   // memory allocation/deletion purpose only.
9126   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9127       MapType,
9128       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9129                                    MappableExprsHandler::OMP_MAP_FROM)));
9130   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9131   // data structure.
9132   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9133   MapperCGF.EmitRuntimeCall(
9134       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9135 }
9136 
9137 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9138     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9139     llvm::Value *DeviceID,
9140     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9141                                      const OMPLoopDirective &D)>
9142         SizeEmitter) {
9143   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9144   const OMPExecutableDirective *TD = &D;
9145   // Get nested teams distribute kind directive, if any.
9146   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9147     TD = getNestedDistributeDirective(CGM.getContext(), D);
9148   if (!TD)
9149     return;
9150   const auto *LD = cast<OMPLoopDirective>(TD);
9151   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9152                                                      PrePostActionTy &) {
9153     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9154       llvm::Value *Args[] = {DeviceID, NumIterations};
9155       CGF.EmitRuntimeCall(
9156           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9157     }
9158   };
9159   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9160 }
9161 
9162 void CGOpenMPRuntime::emitTargetCall(
9163     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9164     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9165     const Expr *Device,
9166     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9167                                      const OMPLoopDirective &D)>
9168         SizeEmitter) {
9169   if (!CGF.HaveInsertPoint())
9170     return;
9171 
9172   assert(OutlinedFn && "Invalid outlined function!");
9173 
9174   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9175   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9176   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9177   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9178                                             PrePostActionTy &) {
9179     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9180   };
9181   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9182 
9183   CodeGenFunction::OMPTargetDataInfo InputInfo;
9184   llvm::Value *MapTypesArray = nullptr;
9185   // Fill up the pointer arrays and transfer execution to the device.
9186   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9187                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9188                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9189     // On top of the arrays that were filled up, the target offloading call
9190     // takes as arguments the device id as well as the host pointer. The host
9191     // pointer is used by the runtime library to identify the current target
9192     // region, so it only has to be unique and not necessarily point to
9193     // anything. It could be the pointer to the outlined function that
9194     // implements the target region, but we aren't using that so that the
9195     // compiler doesn't need to keep that, and could therefore inline the host
9196     // function if proven worthwhile during optimization.
9197 
9198     // From this point on, we need to have an ID of the target region defined.
9199     assert(OutlinedFnID && "Invalid outlined function ID!");
9200 
9201     // Emit device ID if any.
9202     llvm::Value *DeviceID;
9203     if (Device) {
9204       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9205                                            CGF.Int64Ty, /*isSigned=*/true);
9206     } else {
9207       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9208     }
9209 
9210     // Emit the number of elements in the offloading arrays.
9211     llvm::Value *PointerNum =
9212         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9213 
9214     // Return value of the runtime offloading call.
9215     llvm::Value *Return;
9216 
9217     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9218     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9219 
9220     // Emit tripcount for the target loop-based directive.
9221     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9222 
9223     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9224     // The target region is an outlined function launched by the runtime
9225     // via calls __tgt_target() or __tgt_target_teams().
9226     //
9227     // __tgt_target() launches a target region with one team and one thread,
9228     // executing a serial region.  This master thread may in turn launch
9229     // more threads within its team upon encountering a parallel region,
9230     // however, no additional teams can be launched on the device.
9231     //
9232     // __tgt_target_teams() launches a target region with one or more teams,
9233     // each with one or more threads.  This call is required for target
9234     // constructs such as:
9235     //  'target teams'
9236     //  'target' / 'teams'
9237     //  'target teams distribute parallel for'
9238     //  'target parallel'
9239     // and so on.
9240     //
9241     // Note that on the host and CPU targets, the runtime implementation of
9242     // these calls simply call the outlined function without forking threads.
9243     // The outlined functions themselves have runtime calls to
9244     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9245     // the compiler in emitTeamsCall() and emitParallelCall().
9246     //
9247     // In contrast, on the NVPTX target, the implementation of
9248     // __tgt_target_teams() launches a GPU kernel with the requested number
9249     // of teams and threads so no additional calls to the runtime are required.
9250     if (NumTeams) {
9251       // If we have NumTeams defined this means that we have an enclosed teams
9252       // region. Therefore we also expect to have NumThreads defined. These two
9253       // values should be defined in the presence of a teams directive,
9254       // regardless of having any clauses associated. If the user is using teams
9255       // but no clauses, these two values will be the default that should be
9256       // passed to the runtime library - a 32-bit integer with the value zero.
9257       assert(NumThreads && "Thread limit expression should be available along "
9258                            "with number of teams.");
9259       llvm::Value *OffloadingArgs[] = {DeviceID,
9260                                        OutlinedFnID,
9261                                        PointerNum,
9262                                        InputInfo.BasePointersArray.getPointer(),
9263                                        InputInfo.PointersArray.getPointer(),
9264                                        InputInfo.SizesArray.getPointer(),
9265                                        MapTypesArray,
9266                                        NumTeams,
9267                                        NumThreads};
9268       Return = CGF.EmitRuntimeCall(
9269           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9270                                           : OMPRTL__tgt_target_teams),
9271           OffloadingArgs);
9272     } else {
9273       llvm::Value *OffloadingArgs[] = {DeviceID,
9274                                        OutlinedFnID,
9275                                        PointerNum,
9276                                        InputInfo.BasePointersArray.getPointer(),
9277                                        InputInfo.PointersArray.getPointer(),
9278                                        InputInfo.SizesArray.getPointer(),
9279                                        MapTypesArray};
9280       Return = CGF.EmitRuntimeCall(
9281           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9282                                           : OMPRTL__tgt_target),
9283           OffloadingArgs);
9284     }
9285 
9286     // Check the error code and execute the host version if required.
9287     llvm::BasicBlock *OffloadFailedBlock =
9288         CGF.createBasicBlock("omp_offload.failed");
9289     llvm::BasicBlock *OffloadContBlock =
9290         CGF.createBasicBlock("omp_offload.cont");
9291     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9292     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9293 
9294     CGF.EmitBlock(OffloadFailedBlock);
9295     if (RequiresOuterTask) {
9296       CapturedVars.clear();
9297       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9298     }
9299     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9300     CGF.EmitBranch(OffloadContBlock);
9301 
9302     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9303   };
9304 
9305   // Notify that the host version must be executed.
9306   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9307                     RequiresOuterTask](CodeGenFunction &CGF,
9308                                        PrePostActionTy &) {
9309     if (RequiresOuterTask) {
9310       CapturedVars.clear();
9311       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9312     }
9313     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9314   };
9315 
9316   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9317                           &CapturedVars, RequiresOuterTask,
9318                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9319     // Fill up the arrays with all the captured variables.
9320     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9321     MappableExprsHandler::MapValuesArrayTy Pointers;
9322     MappableExprsHandler::MapValuesArrayTy Sizes;
9323     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9324 
9325     // Get mappable expression information.
9326     MappableExprsHandler MEHandler(D, CGF);
9327     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9328 
9329     auto RI = CS.getCapturedRecordDecl()->field_begin();
9330     auto CV = CapturedVars.begin();
9331     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9332                                               CE = CS.capture_end();
9333          CI != CE; ++CI, ++RI, ++CV) {
9334       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9335       MappableExprsHandler::MapValuesArrayTy CurPointers;
9336       MappableExprsHandler::MapValuesArrayTy CurSizes;
9337       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9338       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9339 
9340       // VLA sizes are passed to the outlined region by copy and do not have map
9341       // information associated.
9342       if (CI->capturesVariableArrayType()) {
9343         CurBasePointers.push_back(*CV);
9344         CurPointers.push_back(*CV);
9345         CurSizes.push_back(CGF.Builder.CreateIntCast(
9346             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9347         // Copy to the device as an argument. No need to retrieve it.
9348         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9349                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9350                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9351       } else {
9352         // If we have any information in the map clause, we use it, otherwise we
9353         // just do a default mapping.
9354         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9355                                          CurSizes, CurMapTypes, PartialStruct);
9356         if (CurBasePointers.empty())
9357           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9358                                            CurPointers, CurSizes, CurMapTypes);
9359         // Generate correct mapping for variables captured by reference in
9360         // lambdas.
9361         if (CI->capturesVariable())
9362           MEHandler.generateInfoForLambdaCaptures(
9363               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9364               CurMapTypes, LambdaPointers);
9365       }
9366       // We expect to have at least an element of information for this capture.
9367       assert(!CurBasePointers.empty() &&
9368              "Non-existing map pointer for capture!");
9369       assert(CurBasePointers.size() == CurPointers.size() &&
9370              CurBasePointers.size() == CurSizes.size() &&
9371              CurBasePointers.size() == CurMapTypes.size() &&
9372              "Inconsistent map information sizes!");
9373 
9374       // If there is an entry in PartialStruct it means we have a struct with
9375       // individual members mapped. Emit an extra combined entry.
9376       if (PartialStruct.Base.isValid())
9377         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9378                                     CurMapTypes, PartialStruct);
9379 
9380       // We need to append the results of this capture to what we already have.
9381       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9382       Pointers.append(CurPointers.begin(), CurPointers.end());
9383       Sizes.append(CurSizes.begin(), CurSizes.end());
9384       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9385     }
9386     // Adjust MEMBER_OF flags for the lambdas captures.
9387     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9388                                               Pointers, MapTypes);
9389     // Map other list items in the map clause which are not captured variables
9390     // but "declare target link" global variables.
9391     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9392                                                MapTypes);
9393 
9394     TargetDataInfo Info;
9395     // Fill up the arrays and create the arguments.
9396     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9397     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9398                                  Info.PointersArray, Info.SizesArray,
9399                                  Info.MapTypesArray, Info);
9400     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9401     InputInfo.BasePointersArray =
9402         Address(Info.BasePointersArray, CGM.getPointerAlign());
9403     InputInfo.PointersArray =
9404         Address(Info.PointersArray, CGM.getPointerAlign());
9405     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9406     MapTypesArray = Info.MapTypesArray;
9407     if (RequiresOuterTask)
9408       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9409     else
9410       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9411   };
9412 
9413   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9414                              CodeGenFunction &CGF, PrePostActionTy &) {
9415     if (RequiresOuterTask) {
9416       CodeGenFunction::OMPTargetDataInfo InputInfo;
9417       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9418     } else {
9419       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9420     }
9421   };
9422 
9423   // If we have a target function ID it means that we need to support
9424   // offloading, otherwise, just execute on the host. We need to execute on host
9425   // regardless of the conditional in the if clause if, e.g., the user do not
9426   // specify target triples.
9427   if (OutlinedFnID) {
9428     if (IfCond) {
9429       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9430     } else {
9431       RegionCodeGenTy ThenRCG(TargetThenGen);
9432       ThenRCG(CGF);
9433     }
9434   } else {
9435     RegionCodeGenTy ElseRCG(TargetElseGen);
9436     ElseRCG(CGF);
9437   }
9438 }
9439 
9440 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9441                                                     StringRef ParentName) {
9442   if (!S)
9443     return;
9444 
9445   // Codegen OMP target directives that offload compute to the device.
9446   bool RequiresDeviceCodegen =
9447       isa<OMPExecutableDirective>(S) &&
9448       isOpenMPTargetExecutionDirective(
9449           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9450 
9451   if (RequiresDeviceCodegen) {
9452     const auto &E = *cast<OMPExecutableDirective>(S);
9453     unsigned DeviceID;
9454     unsigned FileID;
9455     unsigned Line;
9456     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9457                              FileID, Line);
9458 
9459     // Is this a target region that should not be emitted as an entry point? If
9460     // so just signal we are done with this target region.
9461     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9462                                                             ParentName, Line))
9463       return;
9464 
9465     switch (E.getDirectiveKind()) {
9466     case OMPD_target:
9467       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9468                                                    cast<OMPTargetDirective>(E));
9469       break;
9470     case OMPD_target_parallel:
9471       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9472           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9473       break;
9474     case OMPD_target_teams:
9475       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9476           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9477       break;
9478     case OMPD_target_teams_distribute:
9479       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9480           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9481       break;
9482     case OMPD_target_teams_distribute_simd:
9483       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9484           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9485       break;
9486     case OMPD_target_parallel_for:
9487       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9488           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9489       break;
9490     case OMPD_target_parallel_for_simd:
9491       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9492           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9493       break;
9494     case OMPD_target_simd:
9495       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9496           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9497       break;
9498     case OMPD_target_teams_distribute_parallel_for:
9499       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9500           CGM, ParentName,
9501           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9502       break;
9503     case OMPD_target_teams_distribute_parallel_for_simd:
9504       CodeGenFunction::
9505           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9506               CGM, ParentName,
9507               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9508       break;
9509     case OMPD_parallel:
9510     case OMPD_for:
9511     case OMPD_parallel_for:
9512     case OMPD_parallel_master:
9513     case OMPD_parallel_sections:
9514     case OMPD_for_simd:
9515     case OMPD_parallel_for_simd:
9516     case OMPD_cancel:
9517     case OMPD_cancellation_point:
9518     case OMPD_ordered:
9519     case OMPD_threadprivate:
9520     case OMPD_allocate:
9521     case OMPD_task:
9522     case OMPD_simd:
9523     case OMPD_sections:
9524     case OMPD_section:
9525     case OMPD_single:
9526     case OMPD_master:
9527     case OMPD_critical:
9528     case OMPD_taskyield:
9529     case OMPD_barrier:
9530     case OMPD_taskwait:
9531     case OMPD_taskgroup:
9532     case OMPD_atomic:
9533     case OMPD_flush:
9534     case OMPD_teams:
9535     case OMPD_target_data:
9536     case OMPD_target_exit_data:
9537     case OMPD_target_enter_data:
9538     case OMPD_distribute:
9539     case OMPD_distribute_simd:
9540     case OMPD_distribute_parallel_for:
9541     case OMPD_distribute_parallel_for_simd:
9542     case OMPD_teams_distribute:
9543     case OMPD_teams_distribute_simd:
9544     case OMPD_teams_distribute_parallel_for:
9545     case OMPD_teams_distribute_parallel_for_simd:
9546     case OMPD_target_update:
9547     case OMPD_declare_simd:
9548     case OMPD_declare_variant:
9549     case OMPD_declare_target:
9550     case OMPD_end_declare_target:
9551     case OMPD_declare_reduction:
9552     case OMPD_declare_mapper:
9553     case OMPD_taskloop:
9554     case OMPD_taskloop_simd:
9555     case OMPD_master_taskloop:
9556     case OMPD_master_taskloop_simd:
9557     case OMPD_parallel_master_taskloop:
9558     case OMPD_parallel_master_taskloop_simd:
9559     case OMPD_requires:
9560     case OMPD_unknown:
9561       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9562     }
9563     return;
9564   }
9565 
9566   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9567     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9568       return;
9569 
9570     scanForTargetRegionsFunctions(
9571         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9572     return;
9573   }
9574 
9575   // If this is a lambda function, look into its body.
9576   if (const auto *L = dyn_cast<LambdaExpr>(S))
9577     S = L->getBody();
9578 
9579   // Keep looking for target regions recursively.
9580   for (const Stmt *II : S->children())
9581     scanForTargetRegionsFunctions(II, ParentName);
9582 }
9583 
9584 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9585   // If emitting code for the host, we do not process FD here. Instead we do
9586   // the normal code generation.
9587   if (!CGM.getLangOpts().OpenMPIsDevice) {
9588     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9589       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9590           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9591       // Do not emit device_type(nohost) functions for the host.
9592       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9593         return true;
9594     }
9595     return false;
9596   }
9597 
9598   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9599   // Try to detect target regions in the function.
9600   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9601     StringRef Name = CGM.getMangledName(GD);
9602     scanForTargetRegionsFunctions(FD->getBody(), Name);
9603     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9604         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9605     // Do not emit device_type(nohost) functions for the host.
9606     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9607       return true;
9608   }
9609 
9610   // Do not to emit function if it is not marked as declare target.
9611   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9612          AlreadyEmittedTargetDecls.count(VD) == 0;
9613 }
9614 
9615 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9616   if (!CGM.getLangOpts().OpenMPIsDevice)
9617     return false;
9618 
9619   // Check if there are Ctors/Dtors in this declaration and look for target
9620   // regions in it. We use the complete variant to produce the kernel name
9621   // mangling.
9622   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9623   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9624     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9625       StringRef ParentName =
9626           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9627       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9628     }
9629     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9630       StringRef ParentName =
9631           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9632       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9633     }
9634   }
9635 
9636   // Do not to emit variable if it is not marked as declare target.
9637   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9638       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9639           cast<VarDecl>(GD.getDecl()));
9640   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9641       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9642        HasRequiresUnifiedSharedMemory)) {
9643     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9644     return true;
9645   }
9646   return false;
9647 }
9648 
9649 llvm::Constant *
9650 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9651                                                 const VarDecl *VD) {
9652   assert(VD->getType().isConstant(CGM.getContext()) &&
9653          "Expected constant variable.");
9654   StringRef VarName;
9655   llvm::Constant *Addr;
9656   llvm::GlobalValue::LinkageTypes Linkage;
9657   QualType Ty = VD->getType();
9658   SmallString<128> Buffer;
9659   {
9660     unsigned DeviceID;
9661     unsigned FileID;
9662     unsigned Line;
9663     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9664                              FileID, Line);
9665     llvm::raw_svector_ostream OS(Buffer);
9666     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9667        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9668     VarName = OS.str();
9669   }
9670   Linkage = llvm::GlobalValue::InternalLinkage;
9671   Addr =
9672       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9673                                   getDefaultFirstprivateAddressSpace());
9674   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9675   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9676   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9677   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9678       VarName, Addr, VarSize,
9679       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9680   return Addr;
9681 }
9682 
9683 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9684                                                    llvm::Constant *Addr) {
9685   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9686       !CGM.getLangOpts().OpenMPIsDevice)
9687     return;
9688   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9689       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9690   if (!Res) {
9691     if (CGM.getLangOpts().OpenMPIsDevice) {
9692       // Register non-target variables being emitted in device code (debug info
9693       // may cause this).
9694       StringRef VarName = CGM.getMangledName(VD);
9695       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9696     }
9697     return;
9698   }
9699   // Register declare target variables.
9700   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9701   StringRef VarName;
9702   CharUnits VarSize;
9703   llvm::GlobalValue::LinkageTypes Linkage;
9704 
9705   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9706       !HasRequiresUnifiedSharedMemory) {
9707     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9708     VarName = CGM.getMangledName(VD);
9709     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9710       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9711       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9712     } else {
9713       VarSize = CharUnits::Zero();
9714     }
9715     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9716     // Temp solution to prevent optimizations of the internal variables.
9717     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9718       std::string RefName = getName({VarName, "ref"});
9719       if (!CGM.GetGlobalValue(RefName)) {
9720         llvm::Constant *AddrRef =
9721             getOrCreateInternalVariable(Addr->getType(), RefName);
9722         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9723         GVAddrRef->setConstant(/*Val=*/true);
9724         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9725         GVAddrRef->setInitializer(Addr);
9726         CGM.addCompilerUsedGlobal(GVAddrRef);
9727       }
9728     }
9729   } else {
9730     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9731             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9732              HasRequiresUnifiedSharedMemory)) &&
9733            "Declare target attribute must link or to with unified memory.");
9734     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9735       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9736     else
9737       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9738 
9739     if (CGM.getLangOpts().OpenMPIsDevice) {
9740       VarName = Addr->getName();
9741       Addr = nullptr;
9742     } else {
9743       VarName = getAddrOfDeclareTargetVar(VD).getName();
9744       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9745     }
9746     VarSize = CGM.getPointerSize();
9747     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9748   }
9749 
9750   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9751       VarName, Addr, VarSize, Flags, Linkage);
9752 }
9753 
9754 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9755   if (isa<FunctionDecl>(GD.getDecl()) ||
9756       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9757     return emitTargetFunctions(GD);
9758 
9759   return emitTargetGlobalVariable(GD);
9760 }
9761 
9762 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9763   for (const VarDecl *VD : DeferredGlobalVariables) {
9764     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9765         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9766     if (!Res)
9767       continue;
9768     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9769         !HasRequiresUnifiedSharedMemory) {
9770       CGM.EmitGlobal(VD);
9771     } else {
9772       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9773               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9774                HasRequiresUnifiedSharedMemory)) &&
9775              "Expected link clause or to clause with unified memory.");
9776       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9777     }
9778   }
9779 }
9780 
9781 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9782     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9783   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9784          " Expected target-based directive.");
9785 }
9786 
9787 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9788     const OMPRequiresDecl *D) {
9789   for (const OMPClause *Clause : D->clauselists()) {
9790     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9791       HasRequiresUnifiedSharedMemory = true;
9792       break;
9793     }
9794   }
9795 }
9796 
9797 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9798                                                        LangAS &AS) {
9799   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9800     return false;
9801   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9802   switch(A->getAllocatorType()) {
9803   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9804   // Not supported, fallback to the default mem space.
9805   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9806   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9807   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9808   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9809   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9810   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9811   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9812     AS = LangAS::Default;
9813     return true;
9814   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9815     llvm_unreachable("Expected predefined allocator for the variables with the "
9816                      "static storage.");
9817   }
9818   return false;
9819 }
9820 
9821 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9822   return HasRequiresUnifiedSharedMemory;
9823 }
9824 
9825 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9826     CodeGenModule &CGM)
9827     : CGM(CGM) {
9828   if (CGM.getLangOpts().OpenMPIsDevice) {
9829     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9830     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9831   }
9832 }
9833 
9834 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9835   if (CGM.getLangOpts().OpenMPIsDevice)
9836     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9837 }
9838 
9839 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9840   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9841     return true;
9842 
9843   const auto *D = cast<FunctionDecl>(GD.getDecl());
9844   // Do not to emit function if it is marked as declare target as it was already
9845   // emitted.
9846   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9847     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9848       if (auto *F = dyn_cast_or_null<llvm::Function>(
9849               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9850         return !F->isDeclaration();
9851       return false;
9852     }
9853     return true;
9854   }
9855 
9856   return !AlreadyEmittedTargetDecls.insert(D).second;
9857 }
9858 
9859 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9860   // If we don't have entries or if we are emitting code for the device, we
9861   // don't need to do anything.
9862   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9863       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9864       (OffloadEntriesInfoManager.empty() &&
9865        !HasEmittedDeclareTargetRegion &&
9866        !HasEmittedTargetRegion))
9867     return nullptr;
9868 
9869   // Create and register the function that handles the requires directives.
9870   ASTContext &C = CGM.getContext();
9871 
9872   llvm::Function *RequiresRegFn;
9873   {
9874     CodeGenFunction CGF(CGM);
9875     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9876     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9877     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9878     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9879     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9880     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9881     // TODO: check for other requires clauses.
9882     // The requires directive takes effect only when a target region is
9883     // present in the compilation unit. Otherwise it is ignored and not
9884     // passed to the runtime. This avoids the runtime from throwing an error
9885     // for mismatching requires clauses across compilation units that don't
9886     // contain at least 1 target region.
9887     assert((HasEmittedTargetRegion ||
9888             HasEmittedDeclareTargetRegion ||
9889             !OffloadEntriesInfoManager.empty()) &&
9890            "Target or declare target region expected.");
9891     if (HasRequiresUnifiedSharedMemory)
9892       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9893     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9894         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9895     CGF.FinishFunction();
9896   }
9897   return RequiresRegFn;
9898 }
9899 
9900 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9901                                     const OMPExecutableDirective &D,
9902                                     SourceLocation Loc,
9903                                     llvm::Function *OutlinedFn,
9904                                     ArrayRef<llvm::Value *> CapturedVars) {
9905   if (!CGF.HaveInsertPoint())
9906     return;
9907 
9908   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9909   CodeGenFunction::RunCleanupsScope Scope(CGF);
9910 
9911   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9912   llvm::Value *Args[] = {
9913       RTLoc,
9914       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9915       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9916   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9917   RealArgs.append(std::begin(Args), std::end(Args));
9918   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9919 
9920   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9921   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9922 }
9923 
9924 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9925                                          const Expr *NumTeams,
9926                                          const Expr *ThreadLimit,
9927                                          SourceLocation Loc) {
9928   if (!CGF.HaveInsertPoint())
9929     return;
9930 
9931   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9932 
9933   llvm::Value *NumTeamsVal =
9934       NumTeams
9935           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9936                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9937           : CGF.Builder.getInt32(0);
9938 
9939   llvm::Value *ThreadLimitVal =
9940       ThreadLimit
9941           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9942                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9943           : CGF.Builder.getInt32(0);
9944 
9945   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9946   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9947                                      ThreadLimitVal};
9948   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9949                       PushNumTeamsArgs);
9950 }
9951 
9952 void CGOpenMPRuntime::emitTargetDataCalls(
9953     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9954     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9955   if (!CGF.HaveInsertPoint())
9956     return;
9957 
9958   // Action used to replace the default codegen action and turn privatization
9959   // off.
9960   PrePostActionTy NoPrivAction;
9961 
9962   // Generate the code for the opening of the data environment. Capture all the
9963   // arguments of the runtime call by reference because they are used in the
9964   // closing of the region.
9965   auto &&BeginThenGen = [this, &D, Device, &Info,
9966                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9967     // Fill up the arrays with all the mapped variables.
9968     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9969     MappableExprsHandler::MapValuesArrayTy Pointers;
9970     MappableExprsHandler::MapValuesArrayTy Sizes;
9971     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9972 
9973     // Get map clause information.
9974     MappableExprsHandler MCHandler(D, CGF);
9975     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9976 
9977     // Fill up the arrays and create the arguments.
9978     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9979 
9980     llvm::Value *BasePointersArrayArg = nullptr;
9981     llvm::Value *PointersArrayArg = nullptr;
9982     llvm::Value *SizesArrayArg = nullptr;
9983     llvm::Value *MapTypesArrayArg = nullptr;
9984     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9985                                  SizesArrayArg, MapTypesArrayArg, Info);
9986 
9987     // Emit device ID if any.
9988     llvm::Value *DeviceID = nullptr;
9989     if (Device) {
9990       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9991                                            CGF.Int64Ty, /*isSigned=*/true);
9992     } else {
9993       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9994     }
9995 
9996     // Emit the number of elements in the offloading arrays.
9997     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9998 
9999     llvm::Value *OffloadingArgs[] = {
10000         DeviceID,         PointerNum,    BasePointersArrayArg,
10001         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10002     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10003                         OffloadingArgs);
10004 
10005     // If device pointer privatization is required, emit the body of the region
10006     // here. It will have to be duplicated: with and without privatization.
10007     if (!Info.CaptureDeviceAddrMap.empty())
10008       CodeGen(CGF);
10009   };
10010 
10011   // Generate code for the closing of the data region.
10012   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10013                                             PrePostActionTy &) {
10014     assert(Info.isValid() && "Invalid data environment closing arguments.");
10015 
10016     llvm::Value *BasePointersArrayArg = nullptr;
10017     llvm::Value *PointersArrayArg = nullptr;
10018     llvm::Value *SizesArrayArg = nullptr;
10019     llvm::Value *MapTypesArrayArg = nullptr;
10020     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10021                                  SizesArrayArg, MapTypesArrayArg, Info);
10022 
10023     // Emit device ID if any.
10024     llvm::Value *DeviceID = nullptr;
10025     if (Device) {
10026       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10027                                            CGF.Int64Ty, /*isSigned=*/true);
10028     } else {
10029       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10030     }
10031 
10032     // Emit the number of elements in the offloading arrays.
10033     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10034 
10035     llvm::Value *OffloadingArgs[] = {
10036         DeviceID,         PointerNum,    BasePointersArrayArg,
10037         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10038     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10039                         OffloadingArgs);
10040   };
10041 
10042   // If we need device pointer privatization, we need to emit the body of the
10043   // region with no privatization in the 'else' branch of the conditional.
10044   // Otherwise, we don't have to do anything.
10045   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10046                                                          PrePostActionTy &) {
10047     if (!Info.CaptureDeviceAddrMap.empty()) {
10048       CodeGen.setAction(NoPrivAction);
10049       CodeGen(CGF);
10050     }
10051   };
10052 
10053   // We don't have to do anything to close the region if the if clause evaluates
10054   // to false.
10055   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10056 
10057   if (IfCond) {
10058     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10059   } else {
10060     RegionCodeGenTy RCG(BeginThenGen);
10061     RCG(CGF);
10062   }
10063 
10064   // If we don't require privatization of device pointers, we emit the body in
10065   // between the runtime calls. This avoids duplicating the body code.
10066   if (Info.CaptureDeviceAddrMap.empty()) {
10067     CodeGen.setAction(NoPrivAction);
10068     CodeGen(CGF);
10069   }
10070 
10071   if (IfCond) {
10072     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10073   } else {
10074     RegionCodeGenTy RCG(EndThenGen);
10075     RCG(CGF);
10076   }
10077 }
10078 
10079 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10080     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10081     const Expr *Device) {
10082   if (!CGF.HaveInsertPoint())
10083     return;
10084 
10085   assert((isa<OMPTargetEnterDataDirective>(D) ||
10086           isa<OMPTargetExitDataDirective>(D) ||
10087           isa<OMPTargetUpdateDirective>(D)) &&
10088          "Expecting either target enter, exit data, or update directives.");
10089 
10090   CodeGenFunction::OMPTargetDataInfo InputInfo;
10091   llvm::Value *MapTypesArray = nullptr;
10092   // Generate the code for the opening of the data environment.
10093   auto &&ThenGen = [this, &D, Device, &InputInfo,
10094                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10095     // Emit device ID if any.
10096     llvm::Value *DeviceID = nullptr;
10097     if (Device) {
10098       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10099                                            CGF.Int64Ty, /*isSigned=*/true);
10100     } else {
10101       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10102     }
10103 
10104     // Emit the number of elements in the offloading arrays.
10105     llvm::Constant *PointerNum =
10106         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10107 
10108     llvm::Value *OffloadingArgs[] = {DeviceID,
10109                                      PointerNum,
10110                                      InputInfo.BasePointersArray.getPointer(),
10111                                      InputInfo.PointersArray.getPointer(),
10112                                      InputInfo.SizesArray.getPointer(),
10113                                      MapTypesArray};
10114 
10115     // Select the right runtime function call for each expected standalone
10116     // directive.
10117     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10118     OpenMPRTLFunction RTLFn;
10119     switch (D.getDirectiveKind()) {
10120     case OMPD_target_enter_data:
10121       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10122                         : OMPRTL__tgt_target_data_begin;
10123       break;
10124     case OMPD_target_exit_data:
10125       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10126                         : OMPRTL__tgt_target_data_end;
10127       break;
10128     case OMPD_target_update:
10129       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10130                         : OMPRTL__tgt_target_data_update;
10131       break;
10132     case OMPD_parallel:
10133     case OMPD_for:
10134     case OMPD_parallel_for:
10135     case OMPD_parallel_master:
10136     case OMPD_parallel_sections:
10137     case OMPD_for_simd:
10138     case OMPD_parallel_for_simd:
10139     case OMPD_cancel:
10140     case OMPD_cancellation_point:
10141     case OMPD_ordered:
10142     case OMPD_threadprivate:
10143     case OMPD_allocate:
10144     case OMPD_task:
10145     case OMPD_simd:
10146     case OMPD_sections:
10147     case OMPD_section:
10148     case OMPD_single:
10149     case OMPD_master:
10150     case OMPD_critical:
10151     case OMPD_taskyield:
10152     case OMPD_barrier:
10153     case OMPD_taskwait:
10154     case OMPD_taskgroup:
10155     case OMPD_atomic:
10156     case OMPD_flush:
10157     case OMPD_teams:
10158     case OMPD_target_data:
10159     case OMPD_distribute:
10160     case OMPD_distribute_simd:
10161     case OMPD_distribute_parallel_for:
10162     case OMPD_distribute_parallel_for_simd:
10163     case OMPD_teams_distribute:
10164     case OMPD_teams_distribute_simd:
10165     case OMPD_teams_distribute_parallel_for:
10166     case OMPD_teams_distribute_parallel_for_simd:
10167     case OMPD_declare_simd:
10168     case OMPD_declare_variant:
10169     case OMPD_declare_target:
10170     case OMPD_end_declare_target:
10171     case OMPD_declare_reduction:
10172     case OMPD_declare_mapper:
10173     case OMPD_taskloop:
10174     case OMPD_taskloop_simd:
10175     case OMPD_master_taskloop:
10176     case OMPD_master_taskloop_simd:
10177     case OMPD_parallel_master_taskloop:
10178     case OMPD_parallel_master_taskloop_simd:
10179     case OMPD_target:
10180     case OMPD_target_simd:
10181     case OMPD_target_teams_distribute:
10182     case OMPD_target_teams_distribute_simd:
10183     case OMPD_target_teams_distribute_parallel_for:
10184     case OMPD_target_teams_distribute_parallel_for_simd:
10185     case OMPD_target_teams:
10186     case OMPD_target_parallel:
10187     case OMPD_target_parallel_for:
10188     case OMPD_target_parallel_for_simd:
10189     case OMPD_requires:
10190     case OMPD_unknown:
10191       llvm_unreachable("Unexpected standalone target data directive.");
10192       break;
10193     }
10194     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10195   };
10196 
10197   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10198                              CodeGenFunction &CGF, PrePostActionTy &) {
10199     // Fill up the arrays with all the mapped variables.
10200     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10201     MappableExprsHandler::MapValuesArrayTy Pointers;
10202     MappableExprsHandler::MapValuesArrayTy Sizes;
10203     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10204 
10205     // Get map clause information.
10206     MappableExprsHandler MEHandler(D, CGF);
10207     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10208 
10209     TargetDataInfo Info;
10210     // Fill up the arrays and create the arguments.
10211     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10212     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10213                                  Info.PointersArray, Info.SizesArray,
10214                                  Info.MapTypesArray, Info);
10215     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10216     InputInfo.BasePointersArray =
10217         Address(Info.BasePointersArray, CGM.getPointerAlign());
10218     InputInfo.PointersArray =
10219         Address(Info.PointersArray, CGM.getPointerAlign());
10220     InputInfo.SizesArray =
10221         Address(Info.SizesArray, CGM.getPointerAlign());
10222     MapTypesArray = Info.MapTypesArray;
10223     if (D.hasClausesOfKind<OMPDependClause>())
10224       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10225     else
10226       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10227   };
10228 
10229   if (IfCond) {
10230     emitIfClause(CGF, IfCond, TargetThenGen,
10231                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10232   } else {
10233     RegionCodeGenTy ThenRCG(TargetThenGen);
10234     ThenRCG(CGF);
10235   }
10236 }
10237 
10238 namespace {
10239   /// Kind of parameter in a function with 'declare simd' directive.
10240   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10241   /// Attribute set of the parameter.
10242   struct ParamAttrTy {
10243     ParamKindTy Kind = Vector;
10244     llvm::APSInt StrideOrArg;
10245     llvm::APSInt Alignment;
10246   };
10247 } // namespace
10248 
10249 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10250                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10251   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10252   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10253   // of that clause. The VLEN value must be power of 2.
10254   // In other case the notion of the function`s "characteristic data type" (CDT)
10255   // is used to compute the vector length.
10256   // CDT is defined in the following order:
10257   //   a) For non-void function, the CDT is the return type.
10258   //   b) If the function has any non-uniform, non-linear parameters, then the
10259   //   CDT is the type of the first such parameter.
10260   //   c) If the CDT determined by a) or b) above is struct, union, or class
10261   //   type which is pass-by-value (except for the type that maps to the
10262   //   built-in complex data type), the characteristic data type is int.
10263   //   d) If none of the above three cases is applicable, the CDT is int.
10264   // The VLEN is then determined based on the CDT and the size of vector
10265   // register of that ISA for which current vector version is generated. The
10266   // VLEN is computed using the formula below:
10267   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10268   // where vector register size specified in section 3.2.1 Registers and the
10269   // Stack Frame of original AMD64 ABI document.
10270   QualType RetType = FD->getReturnType();
10271   if (RetType.isNull())
10272     return 0;
10273   ASTContext &C = FD->getASTContext();
10274   QualType CDT;
10275   if (!RetType.isNull() && !RetType->isVoidType()) {
10276     CDT = RetType;
10277   } else {
10278     unsigned Offset = 0;
10279     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10280       if (ParamAttrs[Offset].Kind == Vector)
10281         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10282       ++Offset;
10283     }
10284     if (CDT.isNull()) {
10285       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10286         if (ParamAttrs[I + Offset].Kind == Vector) {
10287           CDT = FD->getParamDecl(I)->getType();
10288           break;
10289         }
10290       }
10291     }
10292   }
10293   if (CDT.isNull())
10294     CDT = C.IntTy;
10295   CDT = CDT->getCanonicalTypeUnqualified();
10296   if (CDT->isRecordType() || CDT->isUnionType())
10297     CDT = C.IntTy;
10298   return C.getTypeSize(CDT);
10299 }
10300 
10301 static void
10302 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10303                            const llvm::APSInt &VLENVal,
10304                            ArrayRef<ParamAttrTy> ParamAttrs,
10305                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10306   struct ISADataTy {
10307     char ISA;
10308     unsigned VecRegSize;
10309   };
10310   ISADataTy ISAData[] = {
10311       {
10312           'b', 128
10313       }, // SSE
10314       {
10315           'c', 256
10316       }, // AVX
10317       {
10318           'd', 256
10319       }, // AVX2
10320       {
10321           'e', 512
10322       }, // AVX512
10323   };
10324   llvm::SmallVector<char, 2> Masked;
10325   switch (State) {
10326   case OMPDeclareSimdDeclAttr::BS_Undefined:
10327     Masked.push_back('N');
10328     Masked.push_back('M');
10329     break;
10330   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10331     Masked.push_back('N');
10332     break;
10333   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10334     Masked.push_back('M');
10335     break;
10336   }
10337   for (char Mask : Masked) {
10338     for (const ISADataTy &Data : ISAData) {
10339       SmallString<256> Buffer;
10340       llvm::raw_svector_ostream Out(Buffer);
10341       Out << "_ZGV" << Data.ISA << Mask;
10342       if (!VLENVal) {
10343         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10344         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10345         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10346       } else {
10347         Out << VLENVal;
10348       }
10349       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10350         switch (ParamAttr.Kind){
10351         case LinearWithVarStride:
10352           Out << 's' << ParamAttr.StrideOrArg;
10353           break;
10354         case Linear:
10355           Out << 'l';
10356           if (!!ParamAttr.StrideOrArg)
10357             Out << ParamAttr.StrideOrArg;
10358           break;
10359         case Uniform:
10360           Out << 'u';
10361           break;
10362         case Vector:
10363           Out << 'v';
10364           break;
10365         }
10366         if (!!ParamAttr.Alignment)
10367           Out << 'a' << ParamAttr.Alignment;
10368       }
10369       Out << '_' << Fn->getName();
10370       Fn->addFnAttr(Out.str());
10371     }
10372   }
10373 }
10374 
10375 // This are the Functions that are needed to mangle the name of the
10376 // vector functions generated by the compiler, according to the rules
10377 // defined in the "Vector Function ABI specifications for AArch64",
10378 // available at
10379 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10380 
10381 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10382 ///
10383 /// TODO: Need to implement the behavior for reference marked with a
10384 /// var or no linear modifiers (1.b in the section). For this, we
10385 /// need to extend ParamKindTy to support the linear modifiers.
10386 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10387   QT = QT.getCanonicalType();
10388 
10389   if (QT->isVoidType())
10390     return false;
10391 
10392   if (Kind == ParamKindTy::Uniform)
10393     return false;
10394 
10395   if (Kind == ParamKindTy::Linear)
10396     return false;
10397 
10398   // TODO: Handle linear references with modifiers
10399 
10400   if (Kind == ParamKindTy::LinearWithVarStride)
10401     return false;
10402 
10403   return true;
10404 }
10405 
10406 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10407 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10408   QT = QT.getCanonicalType();
10409   unsigned Size = C.getTypeSize(QT);
10410 
10411   // Only scalars and complex within 16 bytes wide set PVB to true.
10412   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10413     return false;
10414 
10415   if (QT->isFloatingType())
10416     return true;
10417 
10418   if (QT->isIntegerType())
10419     return true;
10420 
10421   if (QT->isPointerType())
10422     return true;
10423 
10424   // TODO: Add support for complex types (section 3.1.2, item 2).
10425 
10426   return false;
10427 }
10428 
10429 /// Computes the lane size (LS) of a return type or of an input parameter,
10430 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10431 /// TODO: Add support for references, section 3.2.1, item 1.
10432 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10433   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10434     QualType PTy = QT.getCanonicalType()->getPointeeType();
10435     if (getAArch64PBV(PTy, C))
10436       return C.getTypeSize(PTy);
10437   }
10438   if (getAArch64PBV(QT, C))
10439     return C.getTypeSize(QT);
10440 
10441   return C.getTypeSize(C.getUIntPtrType());
10442 }
10443 
10444 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10445 // signature of the scalar function, as defined in 3.2.2 of the
10446 // AAVFABI.
10447 static std::tuple<unsigned, unsigned, bool>
10448 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10449   QualType RetType = FD->getReturnType().getCanonicalType();
10450 
10451   ASTContext &C = FD->getASTContext();
10452 
10453   bool OutputBecomesInput = false;
10454 
10455   llvm::SmallVector<unsigned, 8> Sizes;
10456   if (!RetType->isVoidType()) {
10457     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10458     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10459       OutputBecomesInput = true;
10460   }
10461   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10462     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10463     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10464   }
10465 
10466   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10467   // The LS of a function parameter / return value can only be a power
10468   // of 2, starting from 8 bits, up to 128.
10469   assert(std::all_of(Sizes.begin(), Sizes.end(),
10470                      [](unsigned Size) {
10471                        return Size == 8 || Size == 16 || Size == 32 ||
10472                               Size == 64 || Size == 128;
10473                      }) &&
10474          "Invalid size");
10475 
10476   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10477                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10478                          OutputBecomesInput);
10479 }
10480 
10481 /// Mangle the parameter part of the vector function name according to
10482 /// their OpenMP classification. The mangling function is defined in
10483 /// section 3.5 of the AAVFABI.
10484 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10485   SmallString<256> Buffer;
10486   llvm::raw_svector_ostream Out(Buffer);
10487   for (const auto &ParamAttr : ParamAttrs) {
10488     switch (ParamAttr.Kind) {
10489     case LinearWithVarStride:
10490       Out << "ls" << ParamAttr.StrideOrArg;
10491       break;
10492     case Linear:
10493       Out << 'l';
10494       // Don't print the step value if it is not present or if it is
10495       // equal to 1.
10496       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10497         Out << ParamAttr.StrideOrArg;
10498       break;
10499     case Uniform:
10500       Out << 'u';
10501       break;
10502     case Vector:
10503       Out << 'v';
10504       break;
10505     }
10506 
10507     if (!!ParamAttr.Alignment)
10508       Out << 'a' << ParamAttr.Alignment;
10509   }
10510 
10511   return std::string(Out.str());
10512 }
10513 
10514 // Function used to add the attribute. The parameter `VLEN` is
10515 // templated to allow the use of "x" when targeting scalable functions
10516 // for SVE.
10517 template <typename T>
10518 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10519                                  char ISA, StringRef ParSeq,
10520                                  StringRef MangledName, bool OutputBecomesInput,
10521                                  llvm::Function *Fn) {
10522   SmallString<256> Buffer;
10523   llvm::raw_svector_ostream Out(Buffer);
10524   Out << Prefix << ISA << LMask << VLEN;
10525   if (OutputBecomesInput)
10526     Out << "v";
10527   Out << ParSeq << "_" << MangledName;
10528   Fn->addFnAttr(Out.str());
10529 }
10530 
10531 // Helper function to generate the Advanced SIMD names depending on
10532 // the value of the NDS when simdlen is not present.
10533 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10534                                       StringRef Prefix, char ISA,
10535                                       StringRef ParSeq, StringRef MangledName,
10536                                       bool OutputBecomesInput,
10537                                       llvm::Function *Fn) {
10538   switch (NDS) {
10539   case 8:
10540     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10541                          OutputBecomesInput, Fn);
10542     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10543                          OutputBecomesInput, Fn);
10544     break;
10545   case 16:
10546     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10547                          OutputBecomesInput, Fn);
10548     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10549                          OutputBecomesInput, Fn);
10550     break;
10551   case 32:
10552     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10553                          OutputBecomesInput, Fn);
10554     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10555                          OutputBecomesInput, Fn);
10556     break;
10557   case 64:
10558   case 128:
10559     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10560                          OutputBecomesInput, Fn);
10561     break;
10562   default:
10563     llvm_unreachable("Scalar type is too wide.");
10564   }
10565 }
10566 
10567 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10568 static void emitAArch64DeclareSimdFunction(
10569     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10570     ArrayRef<ParamAttrTy> ParamAttrs,
10571     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10572     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10573 
10574   // Get basic data for building the vector signature.
10575   const auto Data = getNDSWDS(FD, ParamAttrs);
10576   const unsigned NDS = std::get<0>(Data);
10577   const unsigned WDS = std::get<1>(Data);
10578   const bool OutputBecomesInput = std::get<2>(Data);
10579 
10580   // Check the values provided via `simdlen` by the user.
10581   // 1. A `simdlen(1)` doesn't produce vector signatures,
10582   if (UserVLEN == 1) {
10583     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584         DiagnosticsEngine::Warning,
10585         "The clause simdlen(1) has no effect when targeting aarch64.");
10586     CGM.getDiags().Report(SLoc, DiagID);
10587     return;
10588   }
10589 
10590   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10591   // Advanced SIMD output.
10592   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10593     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10594         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10595                                     "power of 2 when targeting Advanced SIMD.");
10596     CGM.getDiags().Report(SLoc, DiagID);
10597     return;
10598   }
10599 
10600   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10601   // limits.
10602   if (ISA == 's' && UserVLEN != 0) {
10603     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10604       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10605           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10606                                       "lanes in the architectural constraints "
10607                                       "for SVE (min is 128-bit, max is "
10608                                       "2048-bit, by steps of 128-bit)");
10609       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10610       return;
10611     }
10612   }
10613 
10614   // Sort out parameter sequence.
10615   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10616   StringRef Prefix = "_ZGV";
10617   // Generate simdlen from user input (if any).
10618   if (UserVLEN) {
10619     if (ISA == 's') {
10620       // SVE generates only a masked function.
10621       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622                            OutputBecomesInput, Fn);
10623     } else {
10624       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10625       // Advanced SIMD generates one or two functions, depending on
10626       // the `[not]inbranch` clause.
10627       switch (State) {
10628       case OMPDeclareSimdDeclAttr::BS_Undefined:
10629         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10630                              OutputBecomesInput, Fn);
10631         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10632                              OutputBecomesInput, Fn);
10633         break;
10634       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10635         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10636                              OutputBecomesInput, Fn);
10637         break;
10638       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10639         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10640                              OutputBecomesInput, Fn);
10641         break;
10642       }
10643     }
10644   } else {
10645     // If no user simdlen is provided, follow the AAVFABI rules for
10646     // generating the vector length.
10647     if (ISA == 's') {
10648       // SVE, section 3.4.1, item 1.
10649       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10650                            OutputBecomesInput, Fn);
10651     } else {
10652       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10653       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10654       // two vector names depending on the use of the clause
10655       // `[not]inbranch`.
10656       switch (State) {
10657       case OMPDeclareSimdDeclAttr::BS_Undefined:
10658         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10659                                   OutputBecomesInput, Fn);
10660         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10661                                   OutputBecomesInput, Fn);
10662         break;
10663       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10664         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10665                                   OutputBecomesInput, Fn);
10666         break;
10667       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10668         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10669                                   OutputBecomesInput, Fn);
10670         break;
10671       }
10672     }
10673   }
10674 }
10675 
10676 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10677                                               llvm::Function *Fn) {
10678   ASTContext &C = CGM.getContext();
10679   FD = FD->getMostRecentDecl();
10680   // Map params to their positions in function decl.
10681   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10682   if (isa<CXXMethodDecl>(FD))
10683     ParamPositions.try_emplace(FD, 0);
10684   unsigned ParamPos = ParamPositions.size();
10685   for (const ParmVarDecl *P : FD->parameters()) {
10686     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10687     ++ParamPos;
10688   }
10689   while (FD) {
10690     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10691       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10692       // Mark uniform parameters.
10693       for (const Expr *E : Attr->uniforms()) {
10694         E = E->IgnoreParenImpCasts();
10695         unsigned Pos;
10696         if (isa<CXXThisExpr>(E)) {
10697           Pos = ParamPositions[FD];
10698         } else {
10699           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10700                                 ->getCanonicalDecl();
10701           Pos = ParamPositions[PVD];
10702         }
10703         ParamAttrs[Pos].Kind = Uniform;
10704       }
10705       // Get alignment info.
10706       auto NI = Attr->alignments_begin();
10707       for (const Expr *E : Attr->aligneds()) {
10708         E = E->IgnoreParenImpCasts();
10709         unsigned Pos;
10710         QualType ParmTy;
10711         if (isa<CXXThisExpr>(E)) {
10712           Pos = ParamPositions[FD];
10713           ParmTy = E->getType();
10714         } else {
10715           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10716                                 ->getCanonicalDecl();
10717           Pos = ParamPositions[PVD];
10718           ParmTy = PVD->getType();
10719         }
10720         ParamAttrs[Pos].Alignment =
10721             (*NI)
10722                 ? (*NI)->EvaluateKnownConstInt(C)
10723                 : llvm::APSInt::getUnsigned(
10724                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10725                           .getQuantity());
10726         ++NI;
10727       }
10728       // Mark linear parameters.
10729       auto SI = Attr->steps_begin();
10730       auto MI = Attr->modifiers_begin();
10731       for (const Expr *E : Attr->linears()) {
10732         E = E->IgnoreParenImpCasts();
10733         unsigned Pos;
10734         if (isa<CXXThisExpr>(E)) {
10735           Pos = ParamPositions[FD];
10736         } else {
10737           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10738                                 ->getCanonicalDecl();
10739           Pos = ParamPositions[PVD];
10740         }
10741         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10742         ParamAttr.Kind = Linear;
10743         if (*SI) {
10744           Expr::EvalResult Result;
10745           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10746             if (const auto *DRE =
10747                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10748               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10749                 ParamAttr.Kind = LinearWithVarStride;
10750                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10751                     ParamPositions[StridePVD->getCanonicalDecl()]);
10752               }
10753             }
10754           } else {
10755             ParamAttr.StrideOrArg = Result.Val.getInt();
10756           }
10757         }
10758         ++SI;
10759         ++MI;
10760       }
10761       llvm::APSInt VLENVal;
10762       SourceLocation ExprLoc;
10763       const Expr *VLENExpr = Attr->getSimdlen();
10764       if (VLENExpr) {
10765         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10766         ExprLoc = VLENExpr->getExprLoc();
10767       }
10768       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10769       if (CGM.getTriple().isX86()) {
10770         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10771       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10772         unsigned VLEN = VLENVal.getExtValue();
10773         StringRef MangledName = Fn->getName();
10774         if (CGM.getTarget().hasFeature("sve"))
10775           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10776                                          MangledName, 's', 128, Fn, ExprLoc);
10777         if (CGM.getTarget().hasFeature("neon"))
10778           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10779                                          MangledName, 'n', 128, Fn, ExprLoc);
10780       }
10781     }
10782     FD = FD->getPreviousDecl();
10783   }
10784 }
10785 
10786 namespace {
10787 /// Cleanup action for doacross support.
10788 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10789 public:
10790   static const int DoacrossFinArgs = 2;
10791 
10792 private:
10793   llvm::FunctionCallee RTLFn;
10794   llvm::Value *Args[DoacrossFinArgs];
10795 
10796 public:
10797   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10798                     ArrayRef<llvm::Value *> CallArgs)
10799       : RTLFn(RTLFn) {
10800     assert(CallArgs.size() == DoacrossFinArgs);
10801     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10802   }
10803   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10804     if (!CGF.HaveInsertPoint())
10805       return;
10806     CGF.EmitRuntimeCall(RTLFn, Args);
10807   }
10808 };
10809 } // namespace
10810 
10811 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10812                                        const OMPLoopDirective &D,
10813                                        ArrayRef<Expr *> NumIterations) {
10814   if (!CGF.HaveInsertPoint())
10815     return;
10816 
10817   ASTContext &C = CGM.getContext();
10818   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10819   RecordDecl *RD;
10820   if (KmpDimTy.isNull()) {
10821     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10822     //  kmp_int64 lo; // lower
10823     //  kmp_int64 up; // upper
10824     //  kmp_int64 st; // stride
10825     // };
10826     RD = C.buildImplicitRecord("kmp_dim");
10827     RD->startDefinition();
10828     addFieldToRecordDecl(C, RD, Int64Ty);
10829     addFieldToRecordDecl(C, RD, Int64Ty);
10830     addFieldToRecordDecl(C, RD, Int64Ty);
10831     RD->completeDefinition();
10832     KmpDimTy = C.getRecordType(RD);
10833   } else {
10834     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10835   }
10836   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10837   QualType ArrayTy =
10838       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10839 
10840   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10841   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10842   enum { LowerFD = 0, UpperFD, StrideFD };
10843   // Fill dims with data.
10844   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10845     LValue DimsLVal = CGF.MakeAddrLValue(
10846         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10847     // dims.upper = num_iterations;
10848     LValue UpperLVal = CGF.EmitLValueForField(
10849         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10850     llvm::Value *NumIterVal =
10851         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10852                                  D.getNumIterations()->getType(), Int64Ty,
10853                                  D.getNumIterations()->getExprLoc());
10854     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10855     // dims.stride = 1;
10856     LValue StrideLVal = CGF.EmitLValueForField(
10857         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10858     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10859                           StrideLVal);
10860   }
10861 
10862   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10863   // kmp_int32 num_dims, struct kmp_dim * dims);
10864   llvm::Value *Args[] = {
10865       emitUpdateLocation(CGF, D.getBeginLoc()),
10866       getThreadID(CGF, D.getBeginLoc()),
10867       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10868       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10869           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10870           CGM.VoidPtrTy)};
10871 
10872   llvm::FunctionCallee RTLFn =
10873       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10874   CGF.EmitRuntimeCall(RTLFn, Args);
10875   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10876       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10877   llvm::FunctionCallee FiniRTLFn =
10878       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10879   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10880                                              llvm::makeArrayRef(FiniArgs));
10881 }
10882 
10883 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10884                                           const OMPDependClause *C) {
10885   QualType Int64Ty =
10886       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10887   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10888   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10889       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10890   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10891   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10892     const Expr *CounterVal = C->getLoopData(I);
10893     assert(CounterVal);
10894     llvm::Value *CntVal = CGF.EmitScalarConversion(
10895         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10896         CounterVal->getExprLoc());
10897     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10898                           /*Volatile=*/false, Int64Ty);
10899   }
10900   llvm::Value *Args[] = {
10901       emitUpdateLocation(CGF, C->getBeginLoc()),
10902       getThreadID(CGF, C->getBeginLoc()),
10903       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10904   llvm::FunctionCallee RTLFn;
10905   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10906     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10907   } else {
10908     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10909     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10910   }
10911   CGF.EmitRuntimeCall(RTLFn, Args);
10912 }
10913 
10914 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10915                                llvm::FunctionCallee Callee,
10916                                ArrayRef<llvm::Value *> Args) const {
10917   assert(Loc.isValid() && "Outlined function call location must be valid.");
10918   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10919 
10920   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10921     if (Fn->doesNotThrow()) {
10922       CGF.EmitNounwindRuntimeCall(Fn, Args);
10923       return;
10924     }
10925   }
10926   CGF.EmitRuntimeCall(Callee, Args);
10927 }
10928 
10929 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10930     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10931     ArrayRef<llvm::Value *> Args) const {
10932   emitCall(CGF, Loc, OutlinedFn, Args);
10933 }
10934 
10935 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10936   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10937     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10938       HasEmittedDeclareTargetRegion = true;
10939 }
10940 
10941 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10942                                              const VarDecl *NativeParam,
10943                                              const VarDecl *TargetParam) const {
10944   return CGF.GetAddrOfLocalVar(NativeParam);
10945 }
10946 
10947 namespace {
10948 /// Cleanup action for allocate support.
10949 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10950 public:
10951   static const int CleanupArgs = 3;
10952 
10953 private:
10954   llvm::FunctionCallee RTLFn;
10955   llvm::Value *Args[CleanupArgs];
10956 
10957 public:
10958   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10959                        ArrayRef<llvm::Value *> CallArgs)
10960       : RTLFn(RTLFn) {
10961     assert(CallArgs.size() == CleanupArgs &&
10962            "Size of arguments does not match.");
10963     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10964   }
10965   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10966     if (!CGF.HaveInsertPoint())
10967       return;
10968     CGF.EmitRuntimeCall(RTLFn, Args);
10969   }
10970 };
10971 } // namespace
10972 
10973 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10974                                                    const VarDecl *VD) {
10975   if (!VD)
10976     return Address::invalid();
10977   const VarDecl *CVD = VD->getCanonicalDecl();
10978   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10979     return Address::invalid();
10980   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10981   // Use the default allocation.
10982   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10983       !AA->getAllocator())
10984     return Address::invalid();
10985   llvm::Value *Size;
10986   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10987   if (CVD->getType()->isVariablyModifiedType()) {
10988     Size = CGF.getTypeSize(CVD->getType());
10989     // Align the size: ((size + align - 1) / align) * align
10990     Size = CGF.Builder.CreateNUWAdd(
10991         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10992     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10993     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10994   } else {
10995     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10996     Size = CGM.getSize(Sz.alignTo(Align));
10997   }
10998   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10999   assert(AA->getAllocator() &&
11000          "Expected allocator expression for non-default allocator.");
11001   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11002   // According to the standard, the original allocator type is a enum (integer).
11003   // Convert to pointer type, if required.
11004   if (Allocator->getType()->isIntegerTy())
11005     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11006   else if (Allocator->getType()->isPointerTy())
11007     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11008                                                                 CGM.VoidPtrTy);
11009   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11010 
11011   llvm::Value *Addr =
11012       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11013                           getName({CVD->getName(), ".void.addr"}));
11014   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11015                                                               Allocator};
11016   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11017 
11018   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11019                                                 llvm::makeArrayRef(FiniArgs));
11020   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11021       Addr,
11022       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11023       getName({CVD->getName(), ".addr"}));
11024   return Address(Addr, Align);
11025 }
11026 
11027 namespace {
11028 using OMPContextSelectorData =
11029     OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11030 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11031 } // anonymous namespace
11032 
11033 /// Checks current context and returns true if it matches the context selector.
11034 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11035           typename... Arguments>
11036 static bool checkContext(const OMPContextSelectorData &Data,
11037                          Arguments... Params) {
11038   assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11039          "Unknown context selector or context selector set.");
11040   return false;
11041 }
11042 
11043 /// Checks for implementation={vendor(<vendor>)} context selector.
11044 /// \returns true iff <vendor>="llvm", false otherwise.
11045 template <>
11046 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11047     const OMPContextSelectorData &Data) {
11048   return llvm::all_of(Data.Names,
11049                       [](StringRef S) { return !S.compare_lower("llvm"); });
11050 }
11051 
11052 /// Checks for device={kind(<kind>)} context selector.
11053 /// \returns true if <kind>="host" and compilation is for host.
11054 /// true if <kind>="nohost" and compilation is for device.
11055 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11056 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11057 /// false otherwise.
11058 template <>
11059 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11060     const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11061   for (StringRef Name : Data.Names) {
11062     if (!Name.compare_lower("host")) {
11063       if (CGM.getLangOpts().OpenMPIsDevice)
11064         return false;
11065       continue;
11066     }
11067     if (!Name.compare_lower("nohost")) {
11068       if (!CGM.getLangOpts().OpenMPIsDevice)
11069         return false;
11070       continue;
11071     }
11072     switch (CGM.getTriple().getArch()) {
11073     case llvm::Triple::arm:
11074     case llvm::Triple::armeb:
11075     case llvm::Triple::aarch64:
11076     case llvm::Triple::aarch64_be:
11077     case llvm::Triple::aarch64_32:
11078     case llvm::Triple::ppc:
11079     case llvm::Triple::ppc64:
11080     case llvm::Triple::ppc64le:
11081     case llvm::Triple::x86:
11082     case llvm::Triple::x86_64:
11083       if (Name.compare_lower("cpu"))
11084         return false;
11085       break;
11086     case llvm::Triple::amdgcn:
11087     case llvm::Triple::nvptx:
11088     case llvm::Triple::nvptx64:
11089       if (Name.compare_lower("gpu"))
11090         return false;
11091       break;
11092     case llvm::Triple::UnknownArch:
11093     case llvm::Triple::arc:
11094     case llvm::Triple::avr:
11095     case llvm::Triple::bpfel:
11096     case llvm::Triple::bpfeb:
11097     case llvm::Triple::hexagon:
11098     case llvm::Triple::mips:
11099     case llvm::Triple::mipsel:
11100     case llvm::Triple::mips64:
11101     case llvm::Triple::mips64el:
11102     case llvm::Triple::msp430:
11103     case llvm::Triple::r600:
11104     case llvm::Triple::riscv32:
11105     case llvm::Triple::riscv64:
11106     case llvm::Triple::sparc:
11107     case llvm::Triple::sparcv9:
11108     case llvm::Triple::sparcel:
11109     case llvm::Triple::systemz:
11110     case llvm::Triple::tce:
11111     case llvm::Triple::tcele:
11112     case llvm::Triple::thumb:
11113     case llvm::Triple::thumbeb:
11114     case llvm::Triple::xcore:
11115     case llvm::Triple::le32:
11116     case llvm::Triple::le64:
11117     case llvm::Triple::amdil:
11118     case llvm::Triple::amdil64:
11119     case llvm::Triple::hsail:
11120     case llvm::Triple::hsail64:
11121     case llvm::Triple::spir:
11122     case llvm::Triple::spir64:
11123     case llvm::Triple::kalimba:
11124     case llvm::Triple::shave:
11125     case llvm::Triple::lanai:
11126     case llvm::Triple::wasm32:
11127     case llvm::Triple::wasm64:
11128     case llvm::Triple::renderscript32:
11129     case llvm::Triple::renderscript64:
11130     case llvm::Triple::ve:
11131       return false;
11132     }
11133   }
11134   return true;
11135 }
11136 
11137 static bool matchesContext(CodeGenModule &CGM,
11138                            const CompleteOMPContextSelectorData &ContextData) {
11139   for (const OMPContextSelectorData &Data : ContextData) {
11140     switch (Data.Ctx) {
11141     case OMP_CTX_vendor:
11142       assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11143              "Expected implementation context selector set.");
11144       if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11145         return false;
11146       break;
11147     case OMP_CTX_kind:
11148       assert(Data.CtxSet == OMP_CTX_SET_device &&
11149              "Expected device context selector set.");
11150       if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11151                                                                            CGM))
11152         return false;
11153       break;
11154     case OMP_CTX_unknown:
11155       llvm_unreachable("Unknown context selector kind.");
11156     }
11157   }
11158   return true;
11159 }
11160 
11161 static CompleteOMPContextSelectorData
11162 translateAttrToContextSelectorData(ASTContext &C,
11163                                    const OMPDeclareVariantAttr *A) {
11164   CompleteOMPContextSelectorData Data;
11165   for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11166     Data.emplace_back();
11167     auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11168         *std::next(A->ctxSelectorSets_begin(), I));
11169     auto Ctx = static_cast<OpenMPContextSelectorKind>(
11170         *std::next(A->ctxSelectors_begin(), I));
11171     Data.back().CtxSet = CtxSet;
11172     Data.back().Ctx = Ctx;
11173     const Expr *Score = *std::next(A->scores_begin(), I);
11174     Data.back().Score = Score->EvaluateKnownConstInt(C);
11175     switch (Ctx) {
11176     case OMP_CTX_vendor:
11177       assert(CtxSet == OMP_CTX_SET_implementation &&
11178              "Expected implementation context selector set.");
11179       Data.back().Names =
11180           llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11181       break;
11182     case OMP_CTX_kind:
11183       assert(CtxSet == OMP_CTX_SET_device &&
11184              "Expected device context selector set.");
11185       Data.back().Names =
11186           llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11187       break;
11188     case OMP_CTX_unknown:
11189       llvm_unreachable("Unknown context selector kind.");
11190     }
11191   }
11192   return Data;
11193 }
11194 
11195 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11196                            const CompleteOMPContextSelectorData &RHS) {
11197   llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11198   for (const OMPContextSelectorData &D : RHS) {
11199     auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11200     Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11201   }
11202   bool AllSetsAreEqual = true;
11203   for (const OMPContextSelectorData &D : LHS) {
11204     auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11205     if (It == RHSData.end())
11206       return false;
11207     if (D.Names.size() > It->getSecond().size())
11208       return false;
11209     if (llvm::set_union(It->getSecond(), D.Names))
11210       return false;
11211     AllSetsAreEqual =
11212         AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11213   }
11214 
11215   return LHS.size() != RHS.size() || !AllSetsAreEqual;
11216 }
11217 
11218 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11219                             const CompleteOMPContextSelectorData &RHS) {
11220   // Score is calculated as sum of all scores + 1.
11221   llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11222   bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11223   if (RHSIsSubsetOfLHS) {
11224     LHSScore = llvm::APSInt::get(0);
11225   } else {
11226     for (const OMPContextSelectorData &Data : LHS) {
11227       if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11228         LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11229       } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11230         LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11231       } else {
11232         LHSScore += Data.Score;
11233       }
11234     }
11235   }
11236   llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11237   if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11238     RHSScore = llvm::APSInt::get(0);
11239   } else {
11240     for (const OMPContextSelectorData &Data : RHS) {
11241       if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11242         RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11243       } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11244         RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11245       } else {
11246         RHSScore += Data.Score;
11247       }
11248     }
11249   }
11250   return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11251 }
11252 
11253 /// Finds the variant function that matches current context with its context
11254 /// selector.
11255 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11256                                                      const FunctionDecl *FD) {
11257   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11258     return FD;
11259   // Iterate through all DeclareVariant attributes and check context selectors.
11260   const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11261   CompleteOMPContextSelectorData TopMostData;
11262   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11263     CompleteOMPContextSelectorData Data =
11264         translateAttrToContextSelectorData(CGM.getContext(), A);
11265     if (!matchesContext(CGM, Data))
11266       continue;
11267     // If the attribute matches the context, find the attribute with the highest
11268     // score.
11269     if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11270       TopMostAttr = A;
11271       TopMostData.swap(Data);
11272     }
11273   }
11274   if (!TopMostAttr)
11275     return FD;
11276   return cast<FunctionDecl>(
11277       cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11278           ->getDecl());
11279 }
11280 
11281 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11282   const auto *D = cast<FunctionDecl>(GD.getDecl());
11283   // If the original function is defined already, use its definition.
11284   StringRef MangledName = CGM.getMangledName(GD);
11285   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11286   if (Orig && !Orig->isDeclaration())
11287     return false;
11288   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11289   // Emit original function if it does not have declare variant attribute or the
11290   // context does not match.
11291   if (NewFD == D)
11292     return false;
11293   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11294   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11295     DeferredVariantFunction.erase(D);
11296     return true;
11297   }
11298   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11299   return true;
11300 }
11301 
11302 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11303     CodeGenModule &CGM, const OMPLoopDirective &S)
11304     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11305   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11306   if (!NeedToPush)
11307     return;
11308   NontemporalDeclsSet &DS =
11309       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11310   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11311     for (const Stmt *Ref : C->private_refs()) {
11312       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11313       const ValueDecl *VD;
11314       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11315         VD = DRE->getDecl();
11316       } else {
11317         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11318         assert((ME->isImplicitCXXThis() ||
11319                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11320                "Expected member of current class.");
11321         VD = ME->getMemberDecl();
11322       }
11323       DS.insert(VD);
11324     }
11325   }
11326 }
11327 
11328 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11329   if (!NeedToPush)
11330     return;
11331   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11332 }
11333 
11334 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11335   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11336 
11337   return llvm::any_of(
11338       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11339       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11340 }
11341 
11342 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11343     const OMPExecutableDirective &S,
11344     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11345     const {
11346   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11347   // Vars in target/task regions must be excluded completely.
11348   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11349       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11350     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11351     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11352     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11353     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11354       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11355         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11356     }
11357   }
11358   // Exclude vars in private clauses.
11359   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11360     for (const Expr *Ref : C->varlists()) {
11361       if (!Ref->getType()->isScalarType())
11362         continue;
11363       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11364       if (!DRE)
11365         continue;
11366       NeedToCheckForLPCs.insert(DRE->getDecl());
11367     }
11368   }
11369   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11370     for (const Expr *Ref : C->varlists()) {
11371       if (!Ref->getType()->isScalarType())
11372         continue;
11373       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11374       if (!DRE)
11375         continue;
11376       NeedToCheckForLPCs.insert(DRE->getDecl());
11377     }
11378   }
11379   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11380     for (const Expr *Ref : C->varlists()) {
11381       if (!Ref->getType()->isScalarType())
11382         continue;
11383       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11384       if (!DRE)
11385         continue;
11386       NeedToCheckForLPCs.insert(DRE->getDecl());
11387     }
11388   }
11389   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11390     for (const Expr *Ref : C->varlists()) {
11391       if (!Ref->getType()->isScalarType())
11392         continue;
11393       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11394       if (!DRE)
11395         continue;
11396       NeedToCheckForLPCs.insert(DRE->getDecl());
11397     }
11398   }
11399   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11400     for (const Expr *Ref : C->varlists()) {
11401       if (!Ref->getType()->isScalarType())
11402         continue;
11403       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11404       if (!DRE)
11405         continue;
11406       NeedToCheckForLPCs.insert(DRE->getDecl());
11407     }
11408   }
11409   for (const Decl *VD : NeedToCheckForLPCs) {
11410     for (const LastprivateConditionalData &Data :
11411          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11412       if (Data.DeclToUniqueName.count(VD) > 0) {
11413         if (!Data.Disabled)
11414           NeedToAddForLPCsAsDisabled.insert(VD);
11415         break;
11416       }
11417     }
11418   }
11419 }
11420 
11421 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11422     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11423     : CGM(CGF.CGM),
11424       Action((CGM.getLangOpts().OpenMP >= 50 &&
11425               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11426                            [](const OMPLastprivateClause *C) {
11427                              return C->getKind() ==
11428                                     OMPC_LASTPRIVATE_conditional;
11429                            }))
11430                  ? ActionToDo::PushAsLastprivateConditional
11431                  : ActionToDo::DoNotPush) {
11432   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11434     return;
11435   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11436          "Expected a push action.");
11437   LastprivateConditionalData &Data =
11438       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11439   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11440     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11441       continue;
11442 
11443     for (const Expr *Ref : C->varlists()) {
11444       Data.DeclToUniqueName.insert(std::make_pair(
11445           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11446           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11447     }
11448   }
11449   Data.IVLVal = IVLVal;
11450   Data.Fn = CGF.CurFn;
11451 }
11452 
11453 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11454     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11455     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11456   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11457   if (CGM.getLangOpts().OpenMP < 50)
11458     return;
11459   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11460   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11461   if (!NeedToAddForLPCsAsDisabled.empty()) {
11462     Action = ActionToDo::DisableLastprivateConditional;
11463     LastprivateConditionalData &Data =
11464         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11465     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11466       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11467     Data.Fn = CGF.CurFn;
11468     Data.Disabled = true;
11469   }
11470 }
11471 
11472 CGOpenMPRuntime::LastprivateConditionalRAII
11473 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11474     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11475   return LastprivateConditionalRAII(CGF, S);
11476 }
11477 
11478 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11479   if (CGM.getLangOpts().OpenMP < 50)
11480     return;
11481   if (Action == ActionToDo::DisableLastprivateConditional) {
11482     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11483            "Expected list of disabled private vars.");
11484     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11485   }
11486   if (Action == ActionToDo::PushAsLastprivateConditional) {
11487     assert(
11488         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11489         "Expected list of lastprivate conditional vars.");
11490     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11491   }
11492 }
11493 
11494 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11495                                                         const VarDecl *VD) {
11496   ASTContext &C = CGM.getContext();
11497   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11498   if (I == LastprivateConditionalToTypes.end())
11499     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11500   QualType NewType;
11501   const FieldDecl *VDField;
11502   const FieldDecl *FiredField;
11503   LValue BaseLVal;
11504   auto VI = I->getSecond().find(VD);
11505   if (VI == I->getSecond().end()) {
11506     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11507     RD->startDefinition();
11508     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11509     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11510     RD->completeDefinition();
11511     NewType = C.getRecordType(RD);
11512     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11513     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11514     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11515   } else {
11516     NewType = std::get<0>(VI->getSecond());
11517     VDField = std::get<1>(VI->getSecond());
11518     FiredField = std::get<2>(VI->getSecond());
11519     BaseLVal = std::get<3>(VI->getSecond());
11520   }
11521   LValue FiredLVal =
11522       CGF.EmitLValueForField(BaseLVal, FiredField);
11523   CGF.EmitStoreOfScalar(
11524       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11525       FiredLVal);
11526   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11527 }
11528 
11529 namespace {
11530 /// Checks if the lastprivate conditional variable is referenced in LHS.
11531 class LastprivateConditionalRefChecker final
11532     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11533   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11534   const Expr *FoundE = nullptr;
11535   const Decl *FoundD = nullptr;
11536   StringRef UniqueDeclName;
11537   LValue IVLVal;
11538   llvm::Function *FoundFn = nullptr;
11539   SourceLocation Loc;
11540 
11541 public:
11542   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11543     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11544          llvm::reverse(LPM)) {
11545       auto It = D.DeclToUniqueName.find(E->getDecl());
11546       if (It == D.DeclToUniqueName.end())
11547         continue;
11548       if (D.Disabled)
11549         return false;
11550       FoundE = E;
11551       FoundD = E->getDecl()->getCanonicalDecl();
11552       UniqueDeclName = It->second;
11553       IVLVal = D.IVLVal;
11554       FoundFn = D.Fn;
11555       break;
11556     }
11557     return FoundE == E;
11558   }
11559   bool VisitMemberExpr(const MemberExpr *E) {
11560     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11561       return false;
11562     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11563          llvm::reverse(LPM)) {
11564       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11565       if (It == D.DeclToUniqueName.end())
11566         continue;
11567       if (D.Disabled)
11568         return false;
11569       FoundE = E;
11570       FoundD = E->getMemberDecl()->getCanonicalDecl();
11571       UniqueDeclName = It->second;
11572       IVLVal = D.IVLVal;
11573       FoundFn = D.Fn;
11574       break;
11575     }
11576     return FoundE == E;
11577   }
11578   bool VisitStmt(const Stmt *S) {
11579     for (const Stmt *Child : S->children()) {
11580       if (!Child)
11581         continue;
11582       if (const auto *E = dyn_cast<Expr>(Child))
11583         if (!E->isGLValue())
11584           continue;
11585       if (Visit(Child))
11586         return true;
11587     }
11588     return false;
11589   }
11590   explicit LastprivateConditionalRefChecker(
11591       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11592       : LPM(LPM) {}
11593   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11594   getFoundData() const {
11595     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11596   }
11597 };
11598 } // namespace
11599 
11600 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11601                                                        LValue IVLVal,
11602                                                        StringRef UniqueDeclName,
11603                                                        LValue LVal,
11604                                                        SourceLocation Loc) {
11605   // Last updated loop counter for the lastprivate conditional var.
11606   // int<xx> last_iv = 0;
11607   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11608   llvm::Constant *LastIV =
11609       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11610   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11611       IVLVal.getAlignment().getAsAlign());
11612   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11613 
11614   // Last value of the lastprivate conditional.
11615   // decltype(priv_a) last_a;
11616   llvm::Constant *Last = getOrCreateInternalVariable(
11617       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11618   cast<llvm::GlobalVariable>(Last)->setAlignment(
11619       LVal.getAlignment().getAsAlign());
11620   LValue LastLVal =
11621       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11622 
11623   // Global loop counter. Required to handle inner parallel-for regions.
11624   // iv
11625   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11626 
11627   // #pragma omp critical(a)
11628   // if (last_iv <= iv) {
11629   //   last_iv = iv;
11630   //   last_a = priv_a;
11631   // }
11632   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11633                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11634     Action.Enter(CGF);
11635     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11636     // (last_iv <= iv) ? Check if the variable is updated and store new
11637     // value in global var.
11638     llvm::Value *CmpRes;
11639     if (IVLVal.getType()->isSignedIntegerType()) {
11640       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11641     } else {
11642       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11643              "Loop iteration variable must be integer.");
11644       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11645     }
11646     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11647     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11648     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11649     // {
11650     CGF.EmitBlock(ThenBB);
11651 
11652     //   last_iv = iv;
11653     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11654 
11655     //   last_a = priv_a;
11656     switch (CGF.getEvaluationKind(LVal.getType())) {
11657     case TEK_Scalar: {
11658       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11659       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11660       break;
11661     }
11662     case TEK_Complex: {
11663       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11664       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11665       break;
11666     }
11667     case TEK_Aggregate:
11668       llvm_unreachable(
11669           "Aggregates are not supported in lastprivate conditional.");
11670     }
11671     // }
11672     CGF.EmitBranch(ExitBB);
11673     // There is no need to emit line number for unconditional branch.
11674     (void)ApplyDebugLocation::CreateEmpty(CGF);
11675     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11676   };
11677 
11678   if (CGM.getLangOpts().OpenMPSimd) {
11679     // Do not emit as a critical region as no parallel region could be emitted.
11680     RegionCodeGenTy ThenRCG(CodeGen);
11681     ThenRCG(CGF);
11682   } else {
11683     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11684   }
11685 }
11686 
11687 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11688                                                          const Expr *LHS) {
11689   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11690     return;
11691   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11692   if (!Checker.Visit(LHS))
11693     return;
11694   const Expr *FoundE;
11695   const Decl *FoundD;
11696   StringRef UniqueDeclName;
11697   LValue IVLVal;
11698   llvm::Function *FoundFn;
11699   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11700       Checker.getFoundData();
11701   if (FoundFn != CGF.CurFn) {
11702     // Special codegen for inner parallel regions.
11703     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11704     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11705     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11706            "Lastprivate conditional is not found in outer region.");
11707     QualType StructTy = std::get<0>(It->getSecond());
11708     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11709     LValue PrivLVal = CGF.EmitLValue(FoundE);
11710     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11711         PrivLVal.getAddress(CGF),
11712         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11713     LValue BaseLVal =
11714         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11715     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11716     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11717                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11718                         FiredLVal, llvm::AtomicOrdering::Unordered,
11719                         /*IsVolatile=*/true, /*isInit=*/false);
11720     return;
11721   }
11722 
11723   // Private address of the lastprivate conditional in the current context.
11724   // priv_a
11725   LValue LVal = CGF.EmitLValue(FoundE);
11726   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11727                                    FoundE->getExprLoc());
11728 }
11729 
11730 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11731     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11732     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11733   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11734     return;
11735   auto Range = llvm::reverse(LastprivateConditionalStack);
11736   auto It = llvm::find_if(
11737       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11738   if (It == Range.end() || It->Fn != CGF.CurFn)
11739     return;
11740   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11741   assert(LPCI != LastprivateConditionalToTypes.end() &&
11742          "Lastprivates must be registered already.");
11743   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11744   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11745   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11746   for (const auto &Pair : It->DeclToUniqueName) {
11747     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11748     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11749       continue;
11750     auto I = LPCI->getSecond().find(Pair.first);
11751     assert(I != LPCI->getSecond().end() &&
11752            "Lastprivate must be rehistered already.");
11753     // bool Cmp = priv_a.Fired != 0;
11754     LValue BaseLVal = std::get<3>(I->getSecond());
11755     LValue FiredLVal =
11756         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11757     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11758     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11759     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11760     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11761     // if (Cmp) {
11762     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11763     CGF.EmitBlock(ThenBB);
11764     Address Addr = CGF.GetAddrOfLocalVar(VD);
11765     LValue LVal;
11766     if (VD->getType()->isReferenceType())
11767       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11768                                            AlignmentSource::Decl);
11769     else
11770       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11771                                 AlignmentSource::Decl);
11772     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11773                                      D.getBeginLoc());
11774     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11775     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11776     // }
11777   }
11778 }
11779 
11780 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11781     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11782     SourceLocation Loc) {
11783   if (CGF.getLangOpts().OpenMP < 50)
11784     return;
11785   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11786   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11787          "Unknown lastprivate conditional variable.");
11788   StringRef UniqueName = It->second;
11789   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11790   // The variable was not updated in the region - exit.
11791   if (!GV)
11792     return;
11793   LValue LPLVal = CGF.MakeAddrLValue(
11794       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11795   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11796   CGF.EmitStoreOfScalar(Res, PrivLVal);
11797 }
11798 
11799 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11800     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11801     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11802   llvm_unreachable("Not supported in SIMD-only mode");
11803 }
11804 
11805 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11806     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11807     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11808   llvm_unreachable("Not supported in SIMD-only mode");
11809 }
11810 
11811 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11812     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11813     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11814     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11815     bool Tied, unsigned &NumberOfParts) {
11816   llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
11819 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11820                                            SourceLocation Loc,
11821                                            llvm::Function *OutlinedFn,
11822                                            ArrayRef<llvm::Value *> CapturedVars,
11823                                            const Expr *IfCond) {
11824   llvm_unreachable("Not supported in SIMD-only mode");
11825 }
11826 
11827 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11828     CodeGenFunction &CGF, StringRef CriticalName,
11829     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11830     const Expr *Hint) {
11831   llvm_unreachable("Not supported in SIMD-only mode");
11832 }
11833 
11834 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11835                                            const RegionCodeGenTy &MasterOpGen,
11836                                            SourceLocation Loc) {
11837   llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839 
11840 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11841                                             SourceLocation Loc) {
11842   llvm_unreachable("Not supported in SIMD-only mode");
11843 }
11844 
11845 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11846     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11847     SourceLocation Loc) {
11848   llvm_unreachable("Not supported in SIMD-only mode");
11849 }
11850 
11851 void CGOpenMPSIMDRuntime::emitSingleRegion(
11852     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11853     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11854     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11855     ArrayRef<const Expr *> AssignmentOps) {
11856   llvm_unreachable("Not supported in SIMD-only mode");
11857 }
11858 
11859 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11860                                             const RegionCodeGenTy &OrderedOpGen,
11861                                             SourceLocation Loc,
11862                                             bool IsThreads) {
11863   llvm_unreachable("Not supported in SIMD-only mode");
11864 }
11865 
11866 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11867                                           SourceLocation Loc,
11868                                           OpenMPDirectiveKind Kind,
11869                                           bool EmitChecks,
11870                                           bool ForceSimpleCall) {
11871   llvm_unreachable("Not supported in SIMD-only mode");
11872 }
11873 
11874 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11875     CodeGenFunction &CGF, SourceLocation Loc,
11876     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11877     bool Ordered, const DispatchRTInput &DispatchValues) {
11878   llvm_unreachable("Not supported in SIMD-only mode");
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitForStaticInit(
11882     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11883     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11884   llvm_unreachable("Not supported in SIMD-only mode");
11885 }
11886 
11887 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11888     CodeGenFunction &CGF, SourceLocation Loc,
11889     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11890   llvm_unreachable("Not supported in SIMD-only mode");
11891 }
11892 
11893 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11894                                                      SourceLocation Loc,
11895                                                      unsigned IVSize,
11896                                                      bool IVSigned) {
11897   llvm_unreachable("Not supported in SIMD-only mode");
11898 }
11899 
11900 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11901                                               SourceLocation Loc,
11902                                               OpenMPDirectiveKind DKind) {
11903   llvm_unreachable("Not supported in SIMD-only mode");
11904 }
11905 
11906 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11907                                               SourceLocation Loc,
11908                                               unsigned IVSize, bool IVSigned,
11909                                               Address IL, Address LB,
11910                                               Address UB, Address ST) {
11911   llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913 
11914 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11915                                                llvm::Value *NumThreads,
11916                                                SourceLocation Loc) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11921                                              ProcBindKind ProcBind,
11922                                              SourceLocation Loc) {
11923   llvm_unreachable("Not supported in SIMD-only mode");
11924 }
11925 
11926 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11927                                                     const VarDecl *VD,
11928                                                     Address VDAddr,
11929                                                     SourceLocation Loc) {
11930   llvm_unreachable("Not supported in SIMD-only mode");
11931 }
11932 
11933 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11934     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11935     CodeGenFunction *CGF) {
11936   llvm_unreachable("Not supported in SIMD-only mode");
11937 }
11938 
11939 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11940     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11941   llvm_unreachable("Not supported in SIMD-only mode");
11942 }
11943 
11944 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11945                                     ArrayRef<const Expr *> Vars,
11946                                     SourceLocation Loc,
11947                                     llvm::AtomicOrdering AO) {
11948   llvm_unreachable("Not supported in SIMD-only mode");
11949 }
11950 
11951 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11952                                        const OMPExecutableDirective &D,
11953                                        llvm::Function *TaskFunction,
11954                                        QualType SharedsTy, Address Shareds,
11955                                        const Expr *IfCond,
11956                                        const OMPTaskDataTy &Data) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11961     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11962     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11963     const Expr *IfCond, const OMPTaskDataTy &Data) {
11964   llvm_unreachable("Not supported in SIMD-only mode");
11965 }
11966 
11967 void CGOpenMPSIMDRuntime::emitReduction(
11968     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11969     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11970     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11971   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11972   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11973                                  ReductionOps, Options);
11974 }
11975 
11976 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11977     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11978     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11979   llvm_unreachable("Not supported in SIMD-only mode");
11980 }
11981 
11982 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11983                                                   SourceLocation Loc,
11984                                                   ReductionCodeGen &RCG,
11985                                                   unsigned N) {
11986   llvm_unreachable("Not supported in SIMD-only mode");
11987 }
11988 
11989 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11990                                                   SourceLocation Loc,
11991                                                   llvm::Value *ReductionsPtr,
11992                                                   LValue SharedLVal) {
11993   llvm_unreachable("Not supported in SIMD-only mode");
11994 }
11995 
11996 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11997                                            SourceLocation Loc) {
11998   llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000 
12001 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12002     CodeGenFunction &CGF, SourceLocation Loc,
12003     OpenMPDirectiveKind CancelRegion) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
12007 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12008                                          SourceLocation Loc, const Expr *IfCond,
12009                                          OpenMPDirectiveKind CancelRegion) {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12014     const OMPExecutableDirective &D, StringRef ParentName,
12015     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12016     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019 
12020 void CGOpenMPSIMDRuntime::emitTargetCall(
12021     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12022     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12023     const Expr *Device,
12024     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12025                                      const OMPLoopDirective &D)>
12026         SizeEmitter) {
12027   llvm_unreachable("Not supported in SIMD-only mode");
12028 }
12029 
12030 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12031   llvm_unreachable("Not supported in SIMD-only mode");
12032 }
12033 
12034 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12035   llvm_unreachable("Not supported in SIMD-only mode");
12036 }
12037 
12038 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12039   return false;
12040 }
12041 
12042 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12043                                         const OMPExecutableDirective &D,
12044                                         SourceLocation Loc,
12045                                         llvm::Function *OutlinedFn,
12046                                         ArrayRef<llvm::Value *> CapturedVars) {
12047   llvm_unreachable("Not supported in SIMD-only mode");
12048 }
12049 
12050 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12051                                              const Expr *NumTeams,
12052                                              const Expr *ThreadLimit,
12053                                              SourceLocation Loc) {
12054   llvm_unreachable("Not supported in SIMD-only mode");
12055 }
12056 
12057 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12058     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12059     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12060   llvm_unreachable("Not supported in SIMD-only mode");
12061 }
12062 
12063 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12064     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12065     const Expr *Device) {
12066   llvm_unreachable("Not supported in SIMD-only mode");
12067 }
12068 
12069 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12070                                            const OMPLoopDirective &D,
12071                                            ArrayRef<Expr *> NumIterations) {
12072   llvm_unreachable("Not supported in SIMD-only mode");
12073 }
12074 
12075 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12076                                               const OMPDependClause *C) {
12077   llvm_unreachable("Not supported in SIMD-only mode");
12078 }
12079 
12080 const VarDecl *
12081 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12082                                         const VarDecl *NativeParam) const {
12083   llvm_unreachable("Not supported in SIMD-only mode");
12084 }
12085 
12086 Address
12087 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12088                                          const VarDecl *NativeParam,
12089                                          const VarDecl *TargetParam) const {
12090   llvm_unreachable("Not supported in SIMD-only mode");
12091 }
12092