1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/OpenMPKinds.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/CodeGen/ConstantInitBuilder.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/SetOperations.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/Bitcode/BitcodeReader.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 
41 using namespace clang;
42 using namespace CodeGen;
43 using namespace llvm::omp;
44 
45 namespace {
46 /// Base class for handling code generation inside OpenMP regions.
47 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
48 public:
49   /// Kinds of OpenMP regions used in codegen.
50   enum CGOpenMPRegionKind {
51     /// Region with outlined function for standalone 'parallel'
52     /// directive.
53     ParallelOutlinedRegion,
54     /// Region with outlined function for standalone 'task' directive.
55     TaskOutlinedRegion,
56     /// Region for constructs that do not require function outlining,
57     /// like 'for', 'sections', 'atomic' etc. directives.
58     InlinedRegion,
59     /// Region with outlined function for standalone 'target' directive.
60     TargetRegion,
61   };
62 
63   CGOpenMPRegionInfo(const CapturedStmt &CS,
64                      const CGOpenMPRegionKind RegionKind,
65                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
66                      bool HasCancel)
67       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
68         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
69 
70   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
71                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72                      bool HasCancel)
73       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
74         Kind(Kind), HasCancel(HasCancel) {}
75 
76   /// Get a variable or parameter for storing global thread id
77   /// inside OpenMP construct.
78   virtual const VarDecl *getThreadIDVariable() const = 0;
79 
80   /// Emit the captured statement body.
81   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
82 
83   /// Get an LValue for the current ThreadID variable.
84   /// \return LValue for thread id variable. This LValue always has type int32*.
85   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
86 
87   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
88 
89   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
90 
91   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
92 
93   bool hasCancel() const { return HasCancel; }
94 
95   static bool classof(const CGCapturedStmtInfo *Info) {
96     return Info->getKind() == CR_OpenMP;
97   }
98 
99   ~CGOpenMPRegionInfo() override = default;
100 
101 protected:
102   CGOpenMPRegionKind RegionKind;
103   RegionCodeGenTy CodeGen;
104   OpenMPDirectiveKind Kind;
105   bool HasCancel;
106 };
107 
108 /// API for captured statement code generation in OpenMP constructs.
109 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
110 public:
111   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
112                              const RegionCodeGenTy &CodeGen,
113                              OpenMPDirectiveKind Kind, bool HasCancel,
114                              StringRef HelperName)
115       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
116                            HasCancel),
117         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
118     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
119   }
120 
121   /// Get a variable or parameter for storing global thread id
122   /// inside OpenMP construct.
123   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
124 
125   /// Get the name of the capture helper.
126   StringRef getHelperName() const override { return HelperName; }
127 
128   static bool classof(const CGCapturedStmtInfo *Info) {
129     return CGOpenMPRegionInfo::classof(Info) &&
130            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
131                ParallelOutlinedRegion;
132   }
133 
134 private:
135   /// A variable or parameter storing global thread id for OpenMP
136   /// constructs.
137   const VarDecl *ThreadIDVar;
138   StringRef HelperName;
139 };
140 
141 /// API for captured statement code generation in OpenMP constructs.
142 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
143 public:
144   class UntiedTaskActionTy final : public PrePostActionTy {
145     bool Untied;
146     const VarDecl *PartIDVar;
147     const RegionCodeGenTy UntiedCodeGen;
148     llvm::SwitchInst *UntiedSwitch = nullptr;
149 
150   public:
151     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
152                        const RegionCodeGenTy &UntiedCodeGen)
153         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
154     void Enter(CodeGenFunction &CGF) override {
155       if (Untied) {
156         // Emit task switching point.
157         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
158             CGF.GetAddrOfLocalVar(PartIDVar),
159             PartIDVar->getType()->castAs<PointerType>());
160         llvm::Value *Res =
161             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
162         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
163         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
164         CGF.EmitBlock(DoneBB);
165         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
166         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
167         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
168                               CGF.Builder.GetInsertBlock());
169         emitUntiedSwitch(CGF);
170       }
171     }
172     void emitUntiedSwitch(CodeGenFunction &CGF) const {
173       if (Untied) {
174         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
175             CGF.GetAddrOfLocalVar(PartIDVar),
176             PartIDVar->getType()->castAs<PointerType>());
177         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
178                               PartIdLVal);
179         UntiedCodeGen(CGF);
180         CodeGenFunction::JumpDest CurPoint =
181             CGF.getJumpDestInCurrentScope(".untied.next.");
182         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
183         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
184         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185                               CGF.Builder.GetInsertBlock());
186         CGF.EmitBranchThroughCleanup(CurPoint);
187         CGF.EmitBlock(CurPoint.getBlock());
188       }
189     }
190     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
191   };
192   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
193                                  const VarDecl *ThreadIDVar,
194                                  const RegionCodeGenTy &CodeGen,
195                                  OpenMPDirectiveKind Kind, bool HasCancel,
196                                  const UntiedTaskActionTy &Action)
197       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
198         ThreadIDVar(ThreadIDVar), Action(Action) {
199     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
200   }
201 
202   /// Get a variable or parameter for storing global thread id
203   /// inside OpenMP construct.
204   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
205 
206   /// Get an LValue for the current ThreadID variable.
207   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
208 
209   /// Get the name of the capture helper.
210   StringRef getHelperName() const override { return ".omp_outlined."; }
211 
212   void emitUntiedSwitch(CodeGenFunction &CGF) override {
213     Action.emitUntiedSwitch(CGF);
214   }
215 
216   static bool classof(const CGCapturedStmtInfo *Info) {
217     return CGOpenMPRegionInfo::classof(Info) &&
218            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
219                TaskOutlinedRegion;
220   }
221 
222 private:
223   /// A variable or parameter storing global thread id for OpenMP
224   /// constructs.
225   const VarDecl *ThreadIDVar;
226   /// Action for emitting code for untied tasks.
227   const UntiedTaskActionTy &Action;
228 };
229 
230 /// API for inlined captured statement code generation in OpenMP
231 /// constructs.
232 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
233 public:
234   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
235                             const RegionCodeGenTy &CodeGen,
236                             OpenMPDirectiveKind Kind, bool HasCancel)
237       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
238         OldCSI(OldCSI),
239         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
240 
241   // Retrieve the value of the context parameter.
242   llvm::Value *getContextValue() const override {
243     if (OuterRegionInfo)
244       return OuterRegionInfo->getContextValue();
245     llvm_unreachable("No context value for inlined OpenMP region");
246   }
247 
248   void setContextValue(llvm::Value *V) override {
249     if (OuterRegionInfo) {
250       OuterRegionInfo->setContextValue(V);
251       return;
252     }
253     llvm_unreachable("No context value for inlined OpenMP region");
254   }
255 
256   /// Lookup the captured field decl for a variable.
257   const FieldDecl *lookup(const VarDecl *VD) const override {
258     if (OuterRegionInfo)
259       return OuterRegionInfo->lookup(VD);
260     // If there is no outer outlined region,no need to lookup in a list of
261     // captured variables, we can use the original one.
262     return nullptr;
263   }
264 
265   FieldDecl *getThisFieldDecl() const override {
266     if (OuterRegionInfo)
267       return OuterRegionInfo->getThisFieldDecl();
268     return nullptr;
269   }
270 
271   /// Get a variable or parameter for storing global thread id
272   /// inside OpenMP construct.
273   const VarDecl *getThreadIDVariable() const override {
274     if (OuterRegionInfo)
275       return OuterRegionInfo->getThreadIDVariable();
276     return nullptr;
277   }
278 
279   /// Get an LValue for the current ThreadID variable.
280   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
281     if (OuterRegionInfo)
282       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
283     llvm_unreachable("No LValue for inlined OpenMP construct");
284   }
285 
286   /// Get the name of the capture helper.
287   StringRef getHelperName() const override {
288     if (auto *OuterRegionInfo = getOldCSI())
289       return OuterRegionInfo->getHelperName();
290     llvm_unreachable("No helper name for inlined OpenMP construct");
291   }
292 
293   void emitUntiedSwitch(CodeGenFunction &CGF) override {
294     if (OuterRegionInfo)
295       OuterRegionInfo->emitUntiedSwitch(CGF);
296   }
297 
298   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
299 
300   static bool classof(const CGCapturedStmtInfo *Info) {
301     return CGOpenMPRegionInfo::classof(Info) &&
302            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
303   }
304 
305   ~CGOpenMPInlinedRegionInfo() override = default;
306 
307 private:
308   /// CodeGen info about outer OpenMP region.
309   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
310   CGOpenMPRegionInfo *OuterRegionInfo;
311 };
312 
313 /// API for captured statement code generation in OpenMP target
314 /// constructs. For this captures, implicit parameters are used instead of the
315 /// captured fields. The name of the target region has to be unique in a given
316 /// application so it is provided by the client, because only the client has
317 /// the information to generate that.
318 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
319 public:
320   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
321                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
322       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
323                            /*HasCancel=*/false),
324         HelperName(HelperName) {}
325 
326   /// This is unused for target regions because each starts executing
327   /// with a single thread.
328   const VarDecl *getThreadIDVariable() const override { return nullptr; }
329 
330   /// Get the name of the capture helper.
331   StringRef getHelperName() const override { return HelperName; }
332 
333   static bool classof(const CGCapturedStmtInfo *Info) {
334     return CGOpenMPRegionInfo::classof(Info) &&
335            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
336   }
337 
338 private:
339   StringRef HelperName;
340 };
341 
342 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
343   llvm_unreachable("No codegen for expressions");
344 }
345 /// API for generation of expressions captured in a innermost OpenMP
346 /// region.
347 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
348 public:
349   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
350       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
351                                   OMPD_unknown,
352                                   /*HasCancel=*/false),
353         PrivScope(CGF) {
354     // Make sure the globals captured in the provided statement are local by
355     // using the privatization logic. We assume the same variable is not
356     // captured more than once.
357     for (const auto &C : CS.captures()) {
358       if (!C.capturesVariable() && !C.capturesVariableByCopy())
359         continue;
360 
361       const VarDecl *VD = C.getCapturedVar();
362       if (VD->isLocalVarDeclOrParm())
363         continue;
364 
365       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
366                       /*RefersToEnclosingVariableOrCapture=*/false,
367                       VD->getType().getNonReferenceType(), VK_LValue,
368                       C.getLocation());
369       PrivScope.addPrivate(
370           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
371     }
372     (void)PrivScope.Privatize();
373   }
374 
375   /// Lookup the captured field decl for a variable.
376   const FieldDecl *lookup(const VarDecl *VD) const override {
377     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
378       return FD;
379     return nullptr;
380   }
381 
382   /// Emit the captured statement body.
383   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
384     llvm_unreachable("No body for expressions");
385   }
386 
387   /// Get a variable or parameter for storing global thread id
388   /// inside OpenMP construct.
389   const VarDecl *getThreadIDVariable() const override {
390     llvm_unreachable("No thread id for expressions");
391   }
392 
393   /// Get the name of the capture helper.
394   StringRef getHelperName() const override {
395     llvm_unreachable("No helper name for expressions");
396   }
397 
398   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
399 
400 private:
401   /// Private scope to capture global variables.
402   CodeGenFunction::OMPPrivateScope PrivScope;
403 };
404 
405 /// RAII for emitting code of OpenMP constructs.
406 class InlinedOpenMPRegionRAII {
407   CodeGenFunction &CGF;
408   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
409   FieldDecl *LambdaThisCaptureField = nullptr;
410   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
411 
412 public:
413   /// Constructs region for combined constructs.
414   /// \param CodeGen Code generation sequence for combined directives. Includes
415   /// a list of functions used for code generation of implicitly inlined
416   /// regions.
417   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
418                           OpenMPDirectiveKind Kind, bool HasCancel)
419       : CGF(CGF) {
420     // Start emission for the construct.
421     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
422         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
423     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
424     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
425     CGF.LambdaThisCaptureField = nullptr;
426     BlockInfo = CGF.BlockInfo;
427     CGF.BlockInfo = nullptr;
428   }
429 
430   ~InlinedOpenMPRegionRAII() {
431     // Restore original CapturedStmtInfo only if we're done with code emission.
432     auto *OldCSI =
433         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
434     delete CGF.CapturedStmtInfo;
435     CGF.CapturedStmtInfo = OldCSI;
436     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
437     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
438     CGF.BlockInfo = BlockInfo;
439   }
440 };
441 
442 /// Values for bit flags used in the ident_t to describe the fields.
443 /// All enumeric elements are named and described in accordance with the code
444 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
445 enum OpenMPLocationFlags : unsigned {
446   /// Use trampoline for internal microtask.
447   OMP_IDENT_IMD = 0x01,
448   /// Use c-style ident structure.
449   OMP_IDENT_KMPC = 0x02,
450   /// Atomic reduction option for kmpc_reduce.
451   OMP_ATOMIC_REDUCE = 0x10,
452   /// Explicit 'barrier' directive.
453   OMP_IDENT_BARRIER_EXPL = 0x20,
454   /// Implicit barrier in code.
455   OMP_IDENT_BARRIER_IMPL = 0x40,
456   /// Implicit barrier in 'for' directive.
457   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
458   /// Implicit barrier in 'sections' directive.
459   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
460   /// Implicit barrier in 'single' directive.
461   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
462   /// Call of __kmp_for_static_init for static loop.
463   OMP_IDENT_WORK_LOOP = 0x200,
464   /// Call of __kmp_for_static_init for sections.
465   OMP_IDENT_WORK_SECTIONS = 0x400,
466   /// Call of __kmp_for_static_init for distribute.
467   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
468   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
469 };
470 
471 namespace {
472 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
473 /// Values for bit flags for marking which requires clauses have been used.
474 enum OpenMPOffloadingRequiresDirFlags : int64_t {
475   /// flag undefined.
476   OMP_REQ_UNDEFINED               = 0x000,
477   /// no requires clause present.
478   OMP_REQ_NONE                    = 0x001,
479   /// reverse_offload clause.
480   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
481   /// unified_address clause.
482   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
483   /// unified_shared_memory clause.
484   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
485   /// dynamic_allocators clause.
486   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
487   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
488 };
489 
490 enum OpenMPOffloadingReservedDeviceIDs {
491   /// Device ID if the device was not defined, runtime should get it
492   /// from environment variables in the spec.
493   OMP_DEVICEID_UNDEF = -1,
494 };
495 } // anonymous namespace
496 
497 /// Describes ident structure that describes a source location.
498 /// All descriptions are taken from
499 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
500 /// Original structure:
501 /// typedef struct ident {
502 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
503 ///                                  see above  */
504 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
505 ///                                  KMP_IDENT_KMPC identifies this union
506 ///                                  member  */
507 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
508 ///                                  see above */
509 ///#if USE_ITT_BUILD
510 ///                            /*  but currently used for storing
511 ///                                region-specific ITT */
512 ///                            /*  contextual information. */
513 ///#endif /* USE_ITT_BUILD */
514 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
515 ///                                 C++  */
516 ///    char const *psource;    /**< String describing the source location.
517 ///                            The string is composed of semi-colon separated
518 //                             fields which describe the source file,
519 ///                            the function and a pair of line numbers that
520 ///                            delimit the construct.
521 ///                             */
522 /// } ident_t;
523 enum IdentFieldIndex {
524   /// might be used in Fortran
525   IdentField_Reserved_1,
526   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
527   IdentField_Flags,
528   /// Not really used in Fortran any more
529   IdentField_Reserved_2,
530   /// Source[4] in Fortran, do not use for C++
531   IdentField_Reserved_3,
532   /// String describing the source location. The string is composed of
533   /// semi-colon separated fields which describe the source file, the function
534   /// and a pair of line numbers that delimit the construct.
535   IdentField_PSource
536 };
537 
538 /// Schedule types for 'omp for' loops (these enumerators are taken from
539 /// the enum sched_type in kmp.h).
540 enum OpenMPSchedType {
541   /// Lower bound for default (unordered) versions.
542   OMP_sch_lower = 32,
543   OMP_sch_static_chunked = 33,
544   OMP_sch_static = 34,
545   OMP_sch_dynamic_chunked = 35,
546   OMP_sch_guided_chunked = 36,
547   OMP_sch_runtime = 37,
548   OMP_sch_auto = 38,
549   /// static with chunk adjustment (e.g., simd)
550   OMP_sch_static_balanced_chunked = 45,
551   /// Lower bound for 'ordered' versions.
552   OMP_ord_lower = 64,
553   OMP_ord_static_chunked = 65,
554   OMP_ord_static = 66,
555   OMP_ord_dynamic_chunked = 67,
556   OMP_ord_guided_chunked = 68,
557   OMP_ord_runtime = 69,
558   OMP_ord_auto = 70,
559   OMP_sch_default = OMP_sch_static,
560   /// dist_schedule types
561   OMP_dist_sch_static_chunked = 91,
562   OMP_dist_sch_static = 92,
563   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
564   /// Set if the monotonic schedule modifier was present.
565   OMP_sch_modifier_monotonic = (1 << 29),
566   /// Set if the nonmonotonic schedule modifier was present.
567   OMP_sch_modifier_nonmonotonic = (1 << 30),
568 };
569 
570 enum OpenMPRTLFunction {
571   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
572   /// kmpc_micro microtask, ...);
573   OMPRTL__kmpc_fork_call,
574   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
575   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
576   OMPRTL__kmpc_threadprivate_cached,
577   /// Call to void __kmpc_threadprivate_register( ident_t *,
578   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
579   OMPRTL__kmpc_threadprivate_register,
580   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
581   OMPRTL__kmpc_global_thread_num,
582   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
583   // kmp_critical_name *crit);
584   OMPRTL__kmpc_critical,
585   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
586   // global_tid, kmp_critical_name *crit, uintptr_t hint);
587   OMPRTL__kmpc_critical_with_hint,
588   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
589   // kmp_critical_name *crit);
590   OMPRTL__kmpc_end_critical,
591   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
592   // global_tid);
593   OMPRTL__kmpc_cancel_barrier,
594   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
595   OMPRTL__kmpc_barrier,
596   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_for_static_fini,
598   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
599   // global_tid);
600   OMPRTL__kmpc_serialized_parallel,
601   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
602   // global_tid);
603   OMPRTL__kmpc_end_serialized_parallel,
604   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
605   // kmp_int32 num_threads);
606   OMPRTL__kmpc_push_num_threads,
607   // Call to void __kmpc_flush(ident_t *loc);
608   OMPRTL__kmpc_flush,
609   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
610   OMPRTL__kmpc_master,
611   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_end_master,
613   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
614   // int end_part);
615   OMPRTL__kmpc_omp_taskyield,
616   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
617   OMPRTL__kmpc_single,
618   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
619   OMPRTL__kmpc_end_single,
620   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
621   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
622   // kmp_routine_entry_t *task_entry);
623   OMPRTL__kmpc_omp_task_alloc,
624   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
625   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
626   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
627   // kmp_int64 device_id);
628   OMPRTL__kmpc_omp_target_task_alloc,
629   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
630   // new_task);
631   OMPRTL__kmpc_omp_task,
632   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
633   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
634   // kmp_int32 didit);
635   OMPRTL__kmpc_copyprivate,
636   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
637   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
638   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
639   OMPRTL__kmpc_reduce,
640   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
641   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
642   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
643   // *lck);
644   OMPRTL__kmpc_reduce_nowait,
645   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
646   // kmp_critical_name *lck);
647   OMPRTL__kmpc_end_reduce,
648   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
649   // kmp_critical_name *lck);
650   OMPRTL__kmpc_end_reduce_nowait,
651   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
652   // kmp_task_t * new_task);
653   OMPRTL__kmpc_omp_task_begin_if0,
654   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
655   // kmp_task_t * new_task);
656   OMPRTL__kmpc_omp_task_complete_if0,
657   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
658   OMPRTL__kmpc_ordered,
659   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_end_ordered,
661   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
662   // global_tid);
663   OMPRTL__kmpc_omp_taskwait,
664   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
665   OMPRTL__kmpc_taskgroup,
666   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
667   OMPRTL__kmpc_end_taskgroup,
668   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
669   // int proc_bind);
670   OMPRTL__kmpc_push_proc_bind,
671   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
672   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
673   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
674   OMPRTL__kmpc_omp_task_with_deps,
675   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
676   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
677   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
678   OMPRTL__kmpc_omp_wait_deps,
679   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
680   // global_tid, kmp_int32 cncl_kind);
681   OMPRTL__kmpc_cancellationpoint,
682   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
683   // kmp_int32 cncl_kind);
684   OMPRTL__kmpc_cancel,
685   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
686   // kmp_int32 num_teams, kmp_int32 thread_limit);
687   OMPRTL__kmpc_push_num_teams,
688   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
689   // microtask, ...);
690   OMPRTL__kmpc_fork_teams,
691   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
692   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
693   // sched, kmp_uint64 grainsize, void *task_dup);
694   OMPRTL__kmpc_taskloop,
695   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
696   // num_dims, struct kmp_dim *dims);
697   OMPRTL__kmpc_doacross_init,
698   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
699   OMPRTL__kmpc_doacross_fini,
700   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
701   // *vec);
702   OMPRTL__kmpc_doacross_post,
703   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
704   // *vec);
705   OMPRTL__kmpc_doacross_wait,
706   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
707   // *data);
708   OMPRTL__kmpc_task_reduction_init,
709   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
710   // *d);
711   OMPRTL__kmpc_task_reduction_get_th_data,
712   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
713   OMPRTL__kmpc_alloc,
714   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
715   OMPRTL__kmpc_free,
716 
717   //
718   // Offloading related calls
719   //
720   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
721   // size);
722   OMPRTL__kmpc_push_target_tripcount,
723   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
724   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
725   // *arg_types);
726   OMPRTL__tgt_target,
727   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
728   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
729   // *arg_types);
730   OMPRTL__tgt_target_nowait,
731   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
732   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
733   // *arg_types, int32_t num_teams, int32_t thread_limit);
734   OMPRTL__tgt_target_teams,
735   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
736   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
737   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
738   OMPRTL__tgt_target_teams_nowait,
739   // Call to void __tgt_register_requires(int64_t flags);
740   OMPRTL__tgt_register_requires,
741   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_begin,
744   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_begin_nowait,
748   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_end,
751   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_end_nowait,
755   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
756   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
757   OMPRTL__tgt_target_data_update,
758   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
759   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
760   // *arg_types);
761   OMPRTL__tgt_target_data_update_nowait,
762   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
763   OMPRTL__tgt_mapper_num_components,
764   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
765   // *base, void *begin, int64_t size, int64_t type);
766   OMPRTL__tgt_push_mapper_component,
767 };
768 
769 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
770 /// region.
771 class CleanupTy final : public EHScopeStack::Cleanup {
772   PrePostActionTy *Action;
773 
774 public:
775   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
776   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
777     if (!CGF.HaveInsertPoint())
778       return;
779     Action->Exit(CGF);
780   }
781 };
782 
783 } // anonymous namespace
784 
785 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
786   CodeGenFunction::RunCleanupsScope Scope(CGF);
787   if (PrePostAction) {
788     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
789     Callback(CodeGen, CGF, *PrePostAction);
790   } else {
791     PrePostActionTy Action;
792     Callback(CodeGen, CGF, Action);
793   }
794 }
795 
796 /// Check if the combiner is a call to UDR combiner and if it is so return the
797 /// UDR decl used for reduction.
798 static const OMPDeclareReductionDecl *
799 getReductionInit(const Expr *ReductionOp) {
800   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
801     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
802       if (const auto *DRE =
803               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
804         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
805           return DRD;
806   return nullptr;
807 }
808 
809 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
810                                              const OMPDeclareReductionDecl *DRD,
811                                              const Expr *InitOp,
812                                              Address Private, Address Original,
813                                              QualType Ty) {
814   if (DRD->getInitializer()) {
815     std::pair<llvm::Function *, llvm::Function *> Reduction =
816         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
817     const auto *CE = cast<CallExpr>(InitOp);
818     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
819     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
820     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
821     const auto *LHSDRE =
822         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
823     const auto *RHSDRE =
824         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
825     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
826     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
827                             [=]() { return Private; });
828     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
829                             [=]() { return Original; });
830     (void)PrivateScope.Privatize();
831     RValue Func = RValue::get(Reduction.second);
832     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
833     CGF.EmitIgnoredExpr(InitOp);
834   } else {
835     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
836     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
837     auto *GV = new llvm::GlobalVariable(
838         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
839         llvm::GlobalValue::PrivateLinkage, Init, Name);
840     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
841     RValue InitRVal;
842     switch (CGF.getEvaluationKind(Ty)) {
843     case TEK_Scalar:
844       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
845       break;
846     case TEK_Complex:
847       InitRVal =
848           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
849       break;
850     case TEK_Aggregate:
851       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
852       break;
853     }
854     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
855     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
856     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
857                          /*IsInitializer=*/false);
858   }
859 }
860 
861 /// Emit initialization of arrays of complex types.
862 /// \param DestAddr Address of the array.
863 /// \param Type Type of array.
864 /// \param Init Initial expression of array.
865 /// \param SrcAddr Address of the original array.
866 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
867                                  QualType Type, bool EmitDeclareReductionInit,
868                                  const Expr *Init,
869                                  const OMPDeclareReductionDecl *DRD,
870                                  Address SrcAddr = Address::invalid()) {
871   // Perform element-by-element initialization.
872   QualType ElementTy;
873 
874   // Drill down to the base element type on both arrays.
875   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
876   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
877   DestAddr =
878       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
879   if (DRD)
880     SrcAddr =
881         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
882 
883   llvm::Value *SrcBegin = nullptr;
884   if (DRD)
885     SrcBegin = SrcAddr.getPointer();
886   llvm::Value *DestBegin = DestAddr.getPointer();
887   // Cast from pointer to array type to pointer to single element.
888   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
889   // The basic structure here is a while-do loop.
890   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
891   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
892   llvm::Value *IsEmpty =
893       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
894   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
895 
896   // Enter the loop body, making that address the current address.
897   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
898   CGF.EmitBlock(BodyBB);
899 
900   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
901 
902   llvm::PHINode *SrcElementPHI = nullptr;
903   Address SrcElementCurrent = Address::invalid();
904   if (DRD) {
905     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
906                                           "omp.arraycpy.srcElementPast");
907     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
908     SrcElementCurrent =
909         Address(SrcElementPHI,
910                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911   }
912   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
913       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
914   DestElementPHI->addIncoming(DestBegin, EntryBB);
915   Address DestElementCurrent =
916       Address(DestElementPHI,
917               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
918 
919   // Emit copy.
920   {
921     CodeGenFunction::RunCleanupsScope InitScope(CGF);
922     if (EmitDeclareReductionInit) {
923       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
924                                        SrcElementCurrent, ElementTy);
925     } else
926       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
927                            /*IsInitializer=*/false);
928   }
929 
930   if (DRD) {
931     // Shift the address forward by one element.
932     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
933         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
934     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
935   }
936 
937   // Shift the address forward by one element.
938   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
939       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
940   // Check whether we've reached the end.
941   llvm::Value *Done =
942       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
943   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
944   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
945 
946   // Done.
947   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
948 }
949 
950 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
951   return CGF.EmitOMPSharedLValue(E);
952 }
953 
954 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
955                                             const Expr *E) {
956   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
957     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
958   return LValue();
959 }
960 
961 void ReductionCodeGen::emitAggregateInitialization(
962     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
963     const OMPDeclareReductionDecl *DRD) {
964   // Emit VarDecl with copy init for arrays.
965   // Get the address of the original variable captured in current
966   // captured region.
967   const auto *PrivateVD =
968       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
969   bool EmitDeclareReductionInit =
970       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
971   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
972                        EmitDeclareReductionInit,
973                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
974                                                 : PrivateVD->getInit(),
975                        DRD, SharedLVal.getAddress(CGF));
976 }
977 
978 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
979                                    ArrayRef<const Expr *> Privates,
980                                    ArrayRef<const Expr *> ReductionOps) {
981   ClausesData.reserve(Shareds.size());
982   SharedAddresses.reserve(Shareds.size());
983   Sizes.reserve(Shareds.size());
984   BaseDecls.reserve(Shareds.size());
985   auto IPriv = Privates.begin();
986   auto IRed = ReductionOps.begin();
987   for (const Expr *Ref : Shareds) {
988     ClausesData.emplace_back(Ref, *IPriv, *IRed);
989     std::advance(IPriv, 1);
990     std::advance(IRed, 1);
991   }
992 }
993 
994 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
995   assert(SharedAddresses.size() == N &&
996          "Number of generated lvalues must be exactly N.");
997   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
998   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
999   SharedAddresses.emplace_back(First, Second);
1000 }
1001 
1002 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1003   const auto *PrivateVD =
1004       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1005   QualType PrivateType = PrivateVD->getType();
1006   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1007   if (!PrivateType->isVariablyModifiedType()) {
1008     Sizes.emplace_back(
1009         CGF.getTypeSize(
1010             SharedAddresses[N].first.getType().getNonReferenceType()),
1011         nullptr);
1012     return;
1013   }
1014   llvm::Value *Size;
1015   llvm::Value *SizeInChars;
1016   auto *ElemType = cast<llvm::PointerType>(
1017                        SharedAddresses[N].first.getPointer(CGF)->getType())
1018                        ->getElementType();
1019   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1020   if (AsArraySection) {
1021     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1022                                      SharedAddresses[N].first.getPointer(CGF));
1023     Size = CGF.Builder.CreateNUWAdd(
1024         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1025     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1026   } else {
1027     SizeInChars = CGF.getTypeSize(
1028         SharedAddresses[N].first.getType().getNonReferenceType());
1029     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1030   }
1031   Sizes.emplace_back(SizeInChars, Size);
1032   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1033       CGF,
1034       cast<OpaqueValueExpr>(
1035           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1036       RValue::get(Size));
1037   CGF.EmitVariablyModifiedType(PrivateType);
1038 }
1039 
1040 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1041                                          llvm::Value *Size) {
1042   const auto *PrivateVD =
1043       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1044   QualType PrivateType = PrivateVD->getType();
1045   if (!PrivateType->isVariablyModifiedType()) {
1046     assert(!Size && !Sizes[N].second &&
1047            "Size should be nullptr for non-variably modified reduction "
1048            "items.");
1049     return;
1050   }
1051   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1052       CGF,
1053       cast<OpaqueValueExpr>(
1054           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1055       RValue::get(Size));
1056   CGF.EmitVariablyModifiedType(PrivateType);
1057 }
1058 
1059 void ReductionCodeGen::emitInitialization(
1060     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1061     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1062   assert(SharedAddresses.size() > N && "No variable was generated");
1063   const auto *PrivateVD =
1064       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1065   const OMPDeclareReductionDecl *DRD =
1066       getReductionInit(ClausesData[N].ReductionOp);
1067   QualType PrivateType = PrivateVD->getType();
1068   PrivateAddr = CGF.Builder.CreateElementBitCast(
1069       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1070   QualType SharedType = SharedAddresses[N].first.getType();
1071   SharedLVal = CGF.MakeAddrLValue(
1072       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1073                                        CGF.ConvertTypeForMem(SharedType)),
1074       SharedType, SharedAddresses[N].first.getBaseInfo(),
1075       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1076   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1077     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1078   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1079     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1080                                      PrivateAddr, SharedLVal.getAddress(CGF),
1081                                      SharedLVal.getType());
1082   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1083              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1084     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1085                          PrivateVD->getType().getQualifiers(),
1086                          /*IsInitializer=*/false);
1087   }
1088 }
1089 
1090 bool ReductionCodeGen::needCleanups(unsigned N) {
1091   const auto *PrivateVD =
1092       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1093   QualType PrivateType = PrivateVD->getType();
1094   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1095   return DTorKind != QualType::DK_none;
1096 }
1097 
1098 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1099                                     Address PrivateAddr) {
1100   const auto *PrivateVD =
1101       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1102   QualType PrivateType = PrivateVD->getType();
1103   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1104   if (needCleanups(N)) {
1105     PrivateAddr = CGF.Builder.CreateElementBitCast(
1106         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1107     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1108   }
1109 }
1110 
1111 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1112                           LValue BaseLV) {
1113   BaseTy = BaseTy.getNonReferenceType();
1114   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1115          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1116     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1117       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1118     } else {
1119       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1120       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1121     }
1122     BaseTy = BaseTy->getPointeeType();
1123   }
1124   return CGF.MakeAddrLValue(
1125       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1126                                        CGF.ConvertTypeForMem(ElTy)),
1127       BaseLV.getType(), BaseLV.getBaseInfo(),
1128       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1129 }
1130 
1131 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1132                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1133                           llvm::Value *Addr) {
1134   Address Tmp = Address::invalid();
1135   Address TopTmp = Address::invalid();
1136   Address MostTopTmp = Address::invalid();
1137   BaseTy = BaseTy.getNonReferenceType();
1138   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1139          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1140     Tmp = CGF.CreateMemTemp(BaseTy);
1141     if (TopTmp.isValid())
1142       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1143     else
1144       MostTopTmp = Tmp;
1145     TopTmp = Tmp;
1146     BaseTy = BaseTy->getPointeeType();
1147   }
1148   llvm::Type *Ty = BaseLVType;
1149   if (Tmp.isValid())
1150     Ty = Tmp.getElementType();
1151   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1152   if (Tmp.isValid()) {
1153     CGF.Builder.CreateStore(Addr, Tmp);
1154     return MostTopTmp;
1155   }
1156   return Address(Addr, BaseLVAlignment);
1157 }
1158 
1159 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1160   const VarDecl *OrigVD = nullptr;
1161   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1162     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1163     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1164       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1165     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1166       Base = TempASE->getBase()->IgnoreParenImpCasts();
1167     DE = cast<DeclRefExpr>(Base);
1168     OrigVD = cast<VarDecl>(DE->getDecl());
1169   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1170     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1171     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1172       Base = TempASE->getBase()->IgnoreParenImpCasts();
1173     DE = cast<DeclRefExpr>(Base);
1174     OrigVD = cast<VarDecl>(DE->getDecl());
1175   }
1176   return OrigVD;
1177 }
1178 
1179 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1180                                                Address PrivateAddr) {
1181   const DeclRefExpr *DE;
1182   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1183     BaseDecls.emplace_back(OrigVD);
1184     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1185     LValue BaseLValue =
1186         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1187                     OriginalBaseLValue);
1188     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1189         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1190     llvm::Value *PrivatePointer =
1191         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1192             PrivateAddr.getPointer(),
1193             SharedAddresses[N].first.getAddress(CGF).getType());
1194     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1195     return castToBase(CGF, OrigVD->getType(),
1196                       SharedAddresses[N].first.getType(),
1197                       OriginalBaseLValue.getAddress(CGF).getType(),
1198                       OriginalBaseLValue.getAlignment(), Ptr);
1199   }
1200   BaseDecls.emplace_back(
1201       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1202   return PrivateAddr;
1203 }
1204 
1205 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1206   const OMPDeclareReductionDecl *DRD =
1207       getReductionInit(ClausesData[N].ReductionOp);
1208   return DRD && DRD->getInitializer();
1209 }
1210 
1211 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1212   return CGF.EmitLoadOfPointerLValue(
1213       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1214       getThreadIDVariable()->getType()->castAs<PointerType>());
1215 }
1216 
1217 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1218   if (!CGF.HaveInsertPoint())
1219     return;
1220   // 1.2.2 OpenMP Language Terminology
1221   // Structured block - An executable statement with a single entry at the
1222   // top and a single exit at the bottom.
1223   // The point of exit cannot be a branch out of the structured block.
1224   // longjmp() and throw() must not violate the entry/exit criteria.
1225   CGF.EHStack.pushTerminate();
1226   CodeGen(CGF);
1227   CGF.EHStack.popTerminate();
1228 }
1229 
1230 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1231     CodeGenFunction &CGF) {
1232   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1233                             getThreadIDVariable()->getType(),
1234                             AlignmentSource::Decl);
1235 }
1236 
1237 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1238                                        QualType FieldTy) {
1239   auto *Field = FieldDecl::Create(
1240       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1241       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1242       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1243   Field->setAccess(AS_public);
1244   DC->addDecl(Field);
1245   return Field;
1246 }
1247 
1248 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1249                                  StringRef Separator)
1250     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1251       OffloadEntriesInfoManager(CGM) {
1252   ASTContext &C = CGM.getContext();
1253   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1254   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1255   RD->startDefinition();
1256   // reserved_1
1257   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1258   // flags
1259   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1260   // reserved_2
1261   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1262   // reserved_3
1263   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1264   // psource
1265   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1266   RD->completeDefinition();
1267   IdentQTy = C.getRecordType(RD);
1268   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1270 
1271   loadOffloadInfoMetadata();
1272 }
1273 
1274 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1275                                             const GlobalDecl &OldGD,
1276                                             llvm::GlobalValue *OrigAddr,
1277                                             bool IsForDefinition) {
1278   // Emit at least a definition for the aliasee if the the address of the
1279   // original function is requested.
1280   if (IsForDefinition || OrigAddr)
1281     (void)CGM.GetAddrOfGlobal(NewGD);
1282   StringRef NewMangledName = CGM.getMangledName(NewGD);
1283   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1284   if (Addr && !Addr->isDeclaration()) {
1285     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1286     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1287     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1288 
1289     // Create a reference to the named value.  This ensures that it is emitted
1290     // if a deferred decl.
1291     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1292 
1293     // Create the new alias itself, but don't set a name yet.
1294     auto *GA =
1295         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1296 
1297     if (OrigAddr) {
1298       assert(OrigAddr->isDeclaration() && "Expected declaration");
1299 
1300       GA->takeName(OrigAddr);
1301       OrigAddr->replaceAllUsesWith(
1302           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1303       OrigAddr->eraseFromParent();
1304     } else {
1305       GA->setName(CGM.getMangledName(OldGD));
1306     }
1307 
1308     // Set attributes which are particular to an alias; this is a
1309     // specialization of the attributes which may be set on a global function.
1310     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1311         D->isWeakImported())
1312       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1313 
1314     CGM.SetCommonAttributes(OldGD, GA);
1315     return true;
1316   }
1317   return false;
1318 }
1319 
1320 void CGOpenMPRuntime::clear() {
1321   InternalVars.clear();
1322   // Clean non-target variable declarations possibly used only in debug info.
1323   for (const auto &Data : EmittedNonTargetVariables) {
1324     if (!Data.getValue().pointsToAliveValue())
1325       continue;
1326     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1327     if (!GV)
1328       continue;
1329     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1330       continue;
1331     GV->eraseFromParent();
1332   }
1333   // Emit aliases for the deferred aliasees.
1334   for (const auto &Pair : DeferredVariantFunction) {
1335     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1336     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1337     // If not able to emit alias, just emit original declaration.
1338     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1339                                 /*IsForDefinition=*/false);
1340   }
1341 }
1342 
1343 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1344   SmallString<128> Buffer;
1345   llvm::raw_svector_ostream OS(Buffer);
1346   StringRef Sep = FirstSeparator;
1347   for (StringRef Part : Parts) {
1348     OS << Sep << Part;
1349     Sep = Separator;
1350   }
1351   return std::string(OS.str());
1352 }
1353 
1354 static llvm::Function *
1355 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1356                           const Expr *CombinerInitializer, const VarDecl *In,
1357                           const VarDecl *Out, bool IsCombiner) {
1358   // void .omp_combiner.(Ty *in, Ty *out);
1359   ASTContext &C = CGM.getContext();
1360   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1361   FunctionArgList Args;
1362   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1363                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1364   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1365                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1366   Args.push_back(&OmpOutParm);
1367   Args.push_back(&OmpInParm);
1368   const CGFunctionInfo &FnInfo =
1369       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1370   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1371   std::string Name = CGM.getOpenMPRuntime().getName(
1372       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1373   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1374                                     Name, &CGM.getModule());
1375   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1376   if (CGM.getLangOpts().Optimize) {
1377     Fn->removeFnAttr(llvm::Attribute::NoInline);
1378     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1379     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1380   }
1381   CodeGenFunction CGF(CGM);
1382   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1383   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1384   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1385                     Out->getLocation());
1386   CodeGenFunction::OMPPrivateScope Scope(CGF);
1387   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1388   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1389     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1390         .getAddress(CGF);
1391   });
1392   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1393   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1394     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1395         .getAddress(CGF);
1396   });
1397   (void)Scope.Privatize();
1398   if (!IsCombiner && Out->hasInit() &&
1399       !CGF.isTrivialInitializer(Out->getInit())) {
1400     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1401                          Out->getType().getQualifiers(),
1402                          /*IsInitializer=*/true);
1403   }
1404   if (CombinerInitializer)
1405     CGF.EmitIgnoredExpr(CombinerInitializer);
1406   Scope.ForceCleanup();
1407   CGF.FinishFunction();
1408   return Fn;
1409 }
1410 
1411 void CGOpenMPRuntime::emitUserDefinedReduction(
1412     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1413   if (UDRMap.count(D) > 0)
1414     return;
1415   llvm::Function *Combiner = emitCombinerOrInitializer(
1416       CGM, D->getType(), D->getCombiner(),
1417       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1418       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1419       /*IsCombiner=*/true);
1420   llvm::Function *Initializer = nullptr;
1421   if (const Expr *Init = D->getInitializer()) {
1422     Initializer = emitCombinerOrInitializer(
1423         CGM, D->getType(),
1424         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1425                                                                      : nullptr,
1426         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1427         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1428         /*IsCombiner=*/false);
1429   }
1430   UDRMap.try_emplace(D, Combiner, Initializer);
1431   if (CGF) {
1432     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1433     Decls.second.push_back(D);
1434   }
1435 }
1436 
1437 std::pair<llvm::Function *, llvm::Function *>
1438 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1439   auto I = UDRMap.find(D);
1440   if (I != UDRMap.end())
1441     return I->second;
1442   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1443   return UDRMap.lookup(D);
1444 }
1445 
1446 namespace {
1447 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1448 // Builder if one is present.
1449 struct PushAndPopStackRAII {
1450   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1451                       bool HasCancel)
1452       : OMPBuilder(OMPBuilder) {
1453     if (!OMPBuilder)
1454       return;
1455 
1456     // The following callback is the crucial part of clangs cleanup process.
1457     //
1458     // NOTE:
1459     // Once the OpenMPIRBuilder is used to create parallel regions (and
1460     // similar), the cancellation destination (Dest below) is determined via
1461     // IP. That means if we have variables to finalize we split the block at IP,
1462     // use the new block (=BB) as destination to build a JumpDest (via
1463     // getJumpDestInCurrentScope(BB)) which then is fed to
1464     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1465     // to push & pop an FinalizationInfo object.
1466     // The FiniCB will still be needed but at the point where the
1467     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1468     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1469       assert(IP.getBlock()->end() == IP.getPoint() &&
1470              "Clang CG should cause non-terminated block!");
1471       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1472       CGF.Builder.restoreIP(IP);
1473       CodeGenFunction::JumpDest Dest =
1474           CGF.getOMPCancelDestination(OMPD_parallel);
1475       CGF.EmitBranchThroughCleanup(Dest);
1476     };
1477 
1478     // TODO: Remove this once we emit parallel regions through the
1479     //       OpenMPIRBuilder as it can do this setup internally.
1480     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1481         {FiniCB, OMPD_parallel, HasCancel});
1482     OMPBuilder->pushFinalizationCB(std::move(FI));
1483   }
1484   ~PushAndPopStackRAII() {
1485     if (OMPBuilder)
1486       OMPBuilder->popFinalizationCB();
1487   }
1488   llvm::OpenMPIRBuilder *OMPBuilder;
1489 };
1490 } // namespace
1491 
1492 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1493     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1494     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1495     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1496   assert(ThreadIDVar->getType()->isPointerType() &&
1497          "thread id variable must be of type kmp_int32 *");
1498   CodeGenFunction CGF(CGM, true);
1499   bool HasCancel = false;
1500   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1501     HasCancel = OPD->hasCancel();
1502   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1503     HasCancel = OPSD->hasCancel();
1504   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1505     HasCancel = OPFD->hasCancel();
1506   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1507     HasCancel = OPFD->hasCancel();
1508   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1509     HasCancel = OPFD->hasCancel();
1510   else if (const auto *OPFD =
1511                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1512     HasCancel = OPFD->hasCancel();
1513   else if (const auto *OPFD =
1514                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1515     HasCancel = OPFD->hasCancel();
1516 
1517   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1518   //       parallel region to make cancellation barriers work properly.
1519   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1520   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1521   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1522                                     HasCancel, OutlinedHelperName);
1523   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1524   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1525 }
1526 
1527 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1528     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1529     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1530   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1531   return emitParallelOrTeamsOutlinedFunction(
1532       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1533 }
1534 
1535 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1536     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1537     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1538   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1539   return emitParallelOrTeamsOutlinedFunction(
1540       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1541 }
1542 
1543 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1544     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1545     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1546     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1547     bool Tied, unsigned &NumberOfParts) {
1548   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1549                                               PrePostActionTy &) {
1550     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1551     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1552     llvm::Value *TaskArgs[] = {
1553         UpLoc, ThreadID,
1554         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1555                                     TaskTVar->getType()->castAs<PointerType>())
1556             .getPointer(CGF)};
1557     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1558   };
1559   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1560                                                             UntiedCodeGen);
1561   CodeGen.setAction(Action);
1562   assert(!ThreadIDVar->getType()->isPointerType() &&
1563          "thread id variable must be of type kmp_int32 for tasks");
1564   const OpenMPDirectiveKind Region =
1565       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1566                                                       : OMPD_task;
1567   const CapturedStmt *CS = D.getCapturedStmt(Region);
1568   bool HasCancel = false;
1569   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1570     HasCancel = TD->hasCancel();
1571   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1572     HasCancel = TD->hasCancel();
1573   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1574     HasCancel = TD->hasCancel();
1575   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1576     HasCancel = TD->hasCancel();
1577 
1578   CodeGenFunction CGF(CGM, true);
1579   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1580                                         InnermostKind, HasCancel, Action);
1581   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1582   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1583   if (!Tied)
1584     NumberOfParts = Action.getNumberOfParts();
1585   return Res;
1586 }
1587 
1588 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1589                              const RecordDecl *RD, const CGRecordLayout &RL,
1590                              ArrayRef<llvm::Constant *> Data) {
1591   llvm::StructType *StructTy = RL.getLLVMType();
1592   unsigned PrevIdx = 0;
1593   ConstantInitBuilder CIBuilder(CGM);
1594   auto DI = Data.begin();
1595   for (const FieldDecl *FD : RD->fields()) {
1596     unsigned Idx = RL.getLLVMFieldNo(FD);
1597     // Fill the alignment.
1598     for (unsigned I = PrevIdx; I < Idx; ++I)
1599       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1600     PrevIdx = Idx + 1;
1601     Fields.add(*DI);
1602     ++DI;
1603   }
1604 }
1605 
1606 template <class... As>
1607 static llvm::GlobalVariable *
1608 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1609                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1610                    As &&... Args) {
1611   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1612   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1613   ConstantInitBuilder CIBuilder(CGM);
1614   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1615   buildStructValue(Fields, CGM, RD, RL, Data);
1616   return Fields.finishAndCreateGlobal(
1617       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1618       std::forward<As>(Args)...);
1619 }
1620 
1621 template <typename T>
1622 static void
1623 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1624                                          ArrayRef<llvm::Constant *> Data,
1625                                          T &Parent) {
1626   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1627   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1628   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1629   buildStructValue(Fields, CGM, RD, RL, Data);
1630   Fields.finishAndAddTo(Parent);
1631 }
1632 
1633 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1634   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1635   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1636   FlagsTy FlagsKey(Flags, Reserved2Flags);
1637   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1638   if (!Entry) {
1639     if (!DefaultOpenMPPSource) {
1640       // Initialize default location for psource field of ident_t structure of
1641       // all ident_t objects. Format is ";file;function;line;column;;".
1642       // Taken from
1643       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1644       DefaultOpenMPPSource =
1645           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1646       DefaultOpenMPPSource =
1647           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1648     }
1649 
1650     llvm::Constant *Data[] = {
1651         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1652         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1653         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1654         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1655     llvm::GlobalValue *DefaultOpenMPLocation =
1656         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1657                            llvm::GlobalValue::PrivateLinkage);
1658     DefaultOpenMPLocation->setUnnamedAddr(
1659         llvm::GlobalValue::UnnamedAddr::Global);
1660 
1661     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1662   }
1663   return Address(Entry, Align);
1664 }
1665 
1666 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1667                                              bool AtCurrentPoint) {
1668   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1669   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1670 
1671   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1672   if (AtCurrentPoint) {
1673     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1674         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1675   } else {
1676     Elem.second.ServiceInsertPt =
1677         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1678     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1679   }
1680 }
1681 
1682 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1683   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1684   if (Elem.second.ServiceInsertPt) {
1685     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1686     Elem.second.ServiceInsertPt = nullptr;
1687     Ptr->eraseFromParent();
1688   }
1689 }
1690 
1691 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1692                                                  SourceLocation Loc,
1693                                                  unsigned Flags) {
1694   Flags |= OMP_IDENT_KMPC;
1695   // If no debug info is generated - return global default location.
1696   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1697       Loc.isInvalid())
1698     return getOrCreateDefaultLocation(Flags).getPointer();
1699 
1700   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1701 
1702   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1703   Address LocValue = Address::invalid();
1704   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1705   if (I != OpenMPLocThreadIDMap.end())
1706     LocValue = Address(I->second.DebugLoc, Align);
1707 
1708   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1709   // GetOpenMPThreadID was called before this routine.
1710   if (!LocValue.isValid()) {
1711     // Generate "ident_t .kmpc_loc.addr;"
1712     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1713     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1714     Elem.second.DebugLoc = AI.getPointer();
1715     LocValue = AI;
1716 
1717     if (!Elem.second.ServiceInsertPt)
1718       setLocThreadIdInsertPt(CGF);
1719     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1720     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1721     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1722                              CGF.getTypeSize(IdentQTy));
1723   }
1724 
1725   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1726   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1727   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1728   LValue PSource =
1729       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1730 
1731   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1732   if (OMPDebugLoc == nullptr) {
1733     SmallString<128> Buffer2;
1734     llvm::raw_svector_ostream OS2(Buffer2);
1735     // Build debug location
1736     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1737     OS2 << ";" << PLoc.getFilename() << ";";
1738     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1739       OS2 << FD->getQualifiedNameAsString();
1740     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1741     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1742     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1743   }
1744   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1745   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1746 
1747   // Our callers always pass this to a runtime function, so for
1748   // convenience, go ahead and return a naked pointer.
1749   return LocValue.getPointer();
1750 }
1751 
1752 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1753                                           SourceLocation Loc) {
1754   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1755 
1756   llvm::Value *ThreadID = nullptr;
1757   // Check whether we've already cached a load of the thread id in this
1758   // function.
1759   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1760   if (I != OpenMPLocThreadIDMap.end()) {
1761     ThreadID = I->second.ThreadID;
1762     if (ThreadID != nullptr)
1763       return ThreadID;
1764   }
1765   // If exceptions are enabled, do not use parameter to avoid possible crash.
1766   if (auto *OMPRegionInfo =
1767           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1768     if (OMPRegionInfo->getThreadIDVariable()) {
1769       // Check if this an outlined function with thread id passed as argument.
1770       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1771       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1772       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1773           !CGF.getLangOpts().CXXExceptions ||
1774           CGF.Builder.GetInsertBlock() == TopBlock ||
1775           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1776           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1777               TopBlock ||
1778           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1779               CGF.Builder.GetInsertBlock()) {
1780         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1781         // If value loaded in entry block, cache it and use it everywhere in
1782         // function.
1783         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1784           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1785           Elem.second.ThreadID = ThreadID;
1786         }
1787         return ThreadID;
1788       }
1789     }
1790   }
1791 
1792   // This is not an outlined function region - need to call __kmpc_int32
1793   // kmpc_global_thread_num(ident_t *loc).
1794   // Generate thread id value and cache this value for use across the
1795   // function.
1796   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1797   if (!Elem.second.ServiceInsertPt)
1798     setLocThreadIdInsertPt(CGF);
1799   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1800   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1801   llvm::CallInst *Call = CGF.Builder.CreateCall(
1802       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1803       emitUpdateLocation(CGF, Loc));
1804   Call->setCallingConv(CGF.getRuntimeCC());
1805   Elem.second.ThreadID = Call;
1806   return Call;
1807 }
1808 
1809 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1810   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1811   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1812     clearLocThreadIdInsertPt(CGF);
1813     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1814   }
1815   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1816     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1817       UDRMap.erase(D);
1818     FunctionUDRMap.erase(CGF.CurFn);
1819   }
1820   auto I = FunctionUDMMap.find(CGF.CurFn);
1821   if (I != FunctionUDMMap.end()) {
1822     for(const auto *D : I->second)
1823       UDMMap.erase(D);
1824     FunctionUDMMap.erase(I);
1825   }
1826   LastprivateConditionalToTypes.erase(CGF.CurFn);
1827 }
1828 
1829 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1830   return IdentTy->getPointerTo();
1831 }
1832 
1833 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1834   if (!Kmpc_MicroTy) {
1835     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1836     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1837                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1838     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1839   }
1840   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1841 }
1842 
1843 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1844   llvm::FunctionCallee RTLFn = nullptr;
1845   switch (static_cast<OpenMPRTLFunction>(Function)) {
1846   case OMPRTL__kmpc_fork_call: {
1847     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1848     // microtask, ...);
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1850                                 getKmpc_MicroPointerTy()};
1851     auto *FnTy =
1852         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1853     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1854     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1855       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1856         llvm::LLVMContext &Ctx = F->getContext();
1857         llvm::MDBuilder MDB(Ctx);
1858         // Annotate the callback behavior of the __kmpc_fork_call:
1859         //  - The callback callee is argument number 2 (microtask).
1860         //  - The first two arguments of the callback callee are unknown (-1).
1861         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1862         //    callback callee.
1863         F->addMetadata(
1864             llvm::LLVMContext::MD_callback,
1865             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1866                                         2, {-1, -1},
1867                                         /* VarArgsArePassed */ true)}));
1868       }
1869     }
1870     break;
1871   }
1872   case OMPRTL__kmpc_global_thread_num: {
1873     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1874     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1875     auto *FnTy =
1876         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1877     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1878     break;
1879   }
1880   case OMPRTL__kmpc_threadprivate_cached: {
1881     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1882     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1883     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1884                                 CGM.VoidPtrTy, CGM.SizeTy,
1885                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1886     auto *FnTy =
1887         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1888     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1889     break;
1890   }
1891   case OMPRTL__kmpc_critical: {
1892     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1893     // kmp_critical_name *crit);
1894     llvm::Type *TypeParams[] = {
1895         getIdentTyPointerTy(), CGM.Int32Ty,
1896         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1897     auto *FnTy =
1898         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1899     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1900     break;
1901   }
1902   case OMPRTL__kmpc_critical_with_hint: {
1903     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1904     // kmp_critical_name *crit, uintptr_t hint);
1905     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1906                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1907                                 CGM.IntPtrTy};
1908     auto *FnTy =
1909         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1910     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1911     break;
1912   }
1913   case OMPRTL__kmpc_threadprivate_register: {
1914     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1915     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1916     // typedef void *(*kmpc_ctor)(void *);
1917     auto *KmpcCtorTy =
1918         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1919                                 /*isVarArg*/ false)->getPointerTo();
1920     // typedef void *(*kmpc_cctor)(void *, void *);
1921     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1922     auto *KmpcCopyCtorTy =
1923         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1924                                 /*isVarArg*/ false)
1925             ->getPointerTo();
1926     // typedef void (*kmpc_dtor)(void *);
1927     auto *KmpcDtorTy =
1928         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1929             ->getPointerTo();
1930     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1931                               KmpcCopyCtorTy, KmpcDtorTy};
1932     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1933                                         /*isVarArg*/ false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1935     break;
1936   }
1937   case OMPRTL__kmpc_end_critical: {
1938     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1939     // kmp_critical_name *crit);
1940     llvm::Type *TypeParams[] = {
1941         getIdentTyPointerTy(), CGM.Int32Ty,
1942         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1943     auto *FnTy =
1944         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1945     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1946     break;
1947   }
1948   case OMPRTL__kmpc_cancel_barrier: {
1949     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1950     // global_tid);
1951     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1952     auto *FnTy =
1953         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1954     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1955     break;
1956   }
1957   case OMPRTL__kmpc_barrier: {
1958     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1959     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1960     auto *FnTy =
1961         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1962     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1963     break;
1964   }
1965   case OMPRTL__kmpc_for_static_fini: {
1966     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1967     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1968     auto *FnTy =
1969         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1970     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1971     break;
1972   }
1973   case OMPRTL__kmpc_push_num_threads: {
1974     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1975     // kmp_int32 num_threads)
1976     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1977                                 CGM.Int32Ty};
1978     auto *FnTy =
1979         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1980     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1981     break;
1982   }
1983   case OMPRTL__kmpc_serialized_parallel: {
1984     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1985     // global_tid);
1986     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1987     auto *FnTy =
1988         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1989     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1990     break;
1991   }
1992   case OMPRTL__kmpc_end_serialized_parallel: {
1993     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1994     // global_tid);
1995     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1996     auto *FnTy =
1997         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1998     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1999     break;
2000   }
2001   case OMPRTL__kmpc_flush: {
2002     // Build void __kmpc_flush(ident_t *loc);
2003     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
2004     auto *FnTy =
2005         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2006     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
2007     break;
2008   }
2009   case OMPRTL__kmpc_master: {
2010     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
2011     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2012     auto *FnTy =
2013         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2014     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2015     break;
2016   }
2017   case OMPRTL__kmpc_end_master: {
2018     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2019     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2020     auto *FnTy =
2021         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2022     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2023     break;
2024   }
2025   case OMPRTL__kmpc_omp_taskyield: {
2026     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2027     // int end_part);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2029     auto *FnTy =
2030         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2031     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2032     break;
2033   }
2034   case OMPRTL__kmpc_single: {
2035     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2037     auto *FnTy =
2038         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2039     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2040     break;
2041   }
2042   case OMPRTL__kmpc_end_single: {
2043     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2044     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2045     auto *FnTy =
2046         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2047     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2048     break;
2049   }
2050   case OMPRTL__kmpc_omp_task_alloc: {
2051     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2052     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053     // kmp_routine_entry_t *task_entry);
2054     assert(KmpRoutineEntryPtrTy != nullptr &&
2055            "Type kmp_routine_entry_t must be created.");
2056     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2058     // Return void * and then cast to particular kmp_task_t type.
2059     auto *FnTy =
2060         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2061     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2062     break;
2063   }
2064   case OMPRTL__kmpc_omp_target_task_alloc: {
2065     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2066     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2067     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2068     assert(KmpRoutineEntryPtrTy != nullptr &&
2069            "Type kmp_routine_entry_t must be created.");
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2071                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2072                                 CGM.Int64Ty};
2073     // Return void * and then cast to particular kmp_task_t type.
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2077     break;
2078   }
2079   case OMPRTL__kmpc_omp_task: {
2080     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2081     // *new_task);
2082     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2083                                 CGM.VoidPtrTy};
2084     auto *FnTy =
2085         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2086     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2087     break;
2088   }
2089   case OMPRTL__kmpc_copyprivate: {
2090     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2091     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2092     // kmp_int32 didit);
2093     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2094     auto *CpyFnTy =
2095         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2096     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2097                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2098                                 CGM.Int32Ty};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2102     break;
2103   }
2104   case OMPRTL__kmpc_reduce: {
2105     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2106     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2107     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2108     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2109     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2110                                                /*isVarArg=*/false);
2111     llvm::Type *TypeParams[] = {
2112         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2113         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2114         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2115     auto *FnTy =
2116         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2117     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2118     break;
2119   }
2120   case OMPRTL__kmpc_reduce_nowait: {
2121     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2122     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2123     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2124     // *lck);
2125     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2126     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2127                                                /*isVarArg=*/false);
2128     llvm::Type *TypeParams[] = {
2129         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2130         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2131         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2132     auto *FnTy =
2133         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2134     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2135     break;
2136   }
2137   case OMPRTL__kmpc_end_reduce: {
2138     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2139     // kmp_critical_name *lck);
2140     llvm::Type *TypeParams[] = {
2141         getIdentTyPointerTy(), CGM.Int32Ty,
2142         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2143     auto *FnTy =
2144         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2145     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2146     break;
2147   }
2148   case OMPRTL__kmpc_end_reduce_nowait: {
2149     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2150     // kmp_critical_name *lck);
2151     llvm::Type *TypeParams[] = {
2152         getIdentTyPointerTy(), CGM.Int32Ty,
2153         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2154     auto *FnTy =
2155         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2156     RTLFn =
2157         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2158     break;
2159   }
2160   case OMPRTL__kmpc_omp_task_begin_if0: {
2161     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2162     // *new_task);
2163     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2164                                 CGM.VoidPtrTy};
2165     auto *FnTy =
2166         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2167     RTLFn =
2168         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2169     break;
2170   }
2171   case OMPRTL__kmpc_omp_task_complete_if0: {
2172     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2173     // *new_task);
2174     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2175                                 CGM.VoidPtrTy};
2176     auto *FnTy =
2177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2178     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2179                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2180     break;
2181   }
2182   case OMPRTL__kmpc_ordered: {
2183     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2184     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2185     auto *FnTy =
2186         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2187     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2188     break;
2189   }
2190   case OMPRTL__kmpc_end_ordered: {
2191     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2192     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2193     auto *FnTy =
2194         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2195     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2196     break;
2197   }
2198   case OMPRTL__kmpc_omp_taskwait: {
2199     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2200     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2201     auto *FnTy =
2202         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2203     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2204     break;
2205   }
2206   case OMPRTL__kmpc_taskgroup: {
2207     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2208     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2209     auto *FnTy =
2210         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2211     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2212     break;
2213   }
2214   case OMPRTL__kmpc_end_taskgroup: {
2215     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2216     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2217     auto *FnTy =
2218         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2220     break;
2221   }
2222   case OMPRTL__kmpc_push_proc_bind: {
2223     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2224     // int proc_bind)
2225     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2226     auto *FnTy =
2227         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2228     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2229     break;
2230   }
2231   case OMPRTL__kmpc_omp_task_with_deps: {
2232     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2233     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2234     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2235     llvm::Type *TypeParams[] = {
2236         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2237         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2238     auto *FnTy =
2239         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2240     RTLFn =
2241         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2242     break;
2243   }
2244   case OMPRTL__kmpc_omp_wait_deps: {
2245     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2246     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2247     // kmp_depend_info_t *noalias_dep_list);
2248     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2249                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2250                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2251     auto *FnTy =
2252         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2253     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2254     break;
2255   }
2256   case OMPRTL__kmpc_cancellationpoint: {
2257     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2258     // global_tid, kmp_int32 cncl_kind)
2259     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2260     auto *FnTy =
2261         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2262     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2263     break;
2264   }
2265   case OMPRTL__kmpc_cancel: {
2266     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2267     // kmp_int32 cncl_kind)
2268     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2269     auto *FnTy =
2270         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2271     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2272     break;
2273   }
2274   case OMPRTL__kmpc_push_num_teams: {
2275     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2276     // kmp_int32 num_teams, kmp_int32 num_threads)
2277     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2278         CGM.Int32Ty};
2279     auto *FnTy =
2280         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2281     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2282     break;
2283   }
2284   case OMPRTL__kmpc_fork_teams: {
2285     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2286     // microtask, ...);
2287     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2288                                 getKmpc_MicroPointerTy()};
2289     auto *FnTy =
2290         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2291     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2292     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2293       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2294         llvm::LLVMContext &Ctx = F->getContext();
2295         llvm::MDBuilder MDB(Ctx);
2296         // Annotate the callback behavior of the __kmpc_fork_teams:
2297         //  - The callback callee is argument number 2 (microtask).
2298         //  - The first two arguments of the callback callee are unknown (-1).
2299         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2300         //    callback callee.
2301         F->addMetadata(
2302             llvm::LLVMContext::MD_callback,
2303             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2304                                         2, {-1, -1},
2305                                         /* VarArgsArePassed */ true)}));
2306       }
2307     }
2308     break;
2309   }
2310   case OMPRTL__kmpc_taskloop: {
2311     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2312     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2313     // sched, kmp_uint64 grainsize, void *task_dup);
2314     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2315                                 CGM.IntTy,
2316                                 CGM.VoidPtrTy,
2317                                 CGM.IntTy,
2318                                 CGM.Int64Ty->getPointerTo(),
2319                                 CGM.Int64Ty->getPointerTo(),
2320                                 CGM.Int64Ty,
2321                                 CGM.IntTy,
2322                                 CGM.IntTy,
2323                                 CGM.Int64Ty,
2324                                 CGM.VoidPtrTy};
2325     auto *FnTy =
2326         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2327     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2328     break;
2329   }
2330   case OMPRTL__kmpc_doacross_init: {
2331     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2332     // num_dims, struct kmp_dim *dims);
2333     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2334                                 CGM.Int32Ty,
2335                                 CGM.Int32Ty,
2336                                 CGM.VoidPtrTy};
2337     auto *FnTy =
2338         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2339     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2340     break;
2341   }
2342   case OMPRTL__kmpc_doacross_fini: {
2343     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2344     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2345     auto *FnTy =
2346         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2347     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2348     break;
2349   }
2350   case OMPRTL__kmpc_doacross_post: {
2351     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2352     // *vec);
2353     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2354                                 CGM.Int64Ty->getPointerTo()};
2355     auto *FnTy =
2356         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2357     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2358     break;
2359   }
2360   case OMPRTL__kmpc_doacross_wait: {
2361     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2362     // *vec);
2363     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2364                                 CGM.Int64Ty->getPointerTo()};
2365     auto *FnTy =
2366         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2367     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2368     break;
2369   }
2370   case OMPRTL__kmpc_task_reduction_init: {
2371     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2372     // *data);
2373     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2374     auto *FnTy =
2375         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2376     RTLFn =
2377         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2378     break;
2379   }
2380   case OMPRTL__kmpc_task_reduction_get_th_data: {
2381     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2382     // *d);
2383     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2386     RTLFn = CGM.CreateRuntimeFunction(
2387         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2388     break;
2389   }
2390   case OMPRTL__kmpc_alloc: {
2391     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2392     // al); omp_allocator_handle_t type is void *.
2393     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2394     auto *FnTy =
2395         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2396     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2397     break;
2398   }
2399   case OMPRTL__kmpc_free: {
2400     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2401     // al); omp_allocator_handle_t type is void *.
2402     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2403     auto *FnTy =
2404         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2405     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2406     break;
2407   }
2408   case OMPRTL__kmpc_push_target_tripcount: {
2409     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2410     // size);
2411     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2412     llvm::FunctionType *FnTy =
2413         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2414     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2415     break;
2416   }
2417   case OMPRTL__tgt_target: {
2418     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2419     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2420     // *arg_types);
2421     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2422                                 CGM.VoidPtrTy,
2423                                 CGM.Int32Ty,
2424                                 CGM.VoidPtrPtrTy,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.Int64Ty->getPointerTo(),
2427                                 CGM.Int64Ty->getPointerTo()};
2428     auto *FnTy =
2429         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2430     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2431     break;
2432   }
2433   case OMPRTL__tgt_target_nowait: {
2434     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2435     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2436     // int64_t *arg_types);
2437     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2438                                 CGM.VoidPtrTy,
2439                                 CGM.Int32Ty,
2440                                 CGM.VoidPtrPtrTy,
2441                                 CGM.VoidPtrPtrTy,
2442                                 CGM.Int64Ty->getPointerTo(),
2443                                 CGM.Int64Ty->getPointerTo()};
2444     auto *FnTy =
2445         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2446     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2447     break;
2448   }
2449   case OMPRTL__tgt_target_teams: {
2450     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2451     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2452     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2453     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2454                                 CGM.VoidPtrTy,
2455                                 CGM.Int32Ty,
2456                                 CGM.VoidPtrPtrTy,
2457                                 CGM.VoidPtrPtrTy,
2458                                 CGM.Int64Ty->getPointerTo(),
2459                                 CGM.Int64Ty->getPointerTo(),
2460                                 CGM.Int32Ty,
2461                                 CGM.Int32Ty};
2462     auto *FnTy =
2463         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2464     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2465     break;
2466   }
2467   case OMPRTL__tgt_target_teams_nowait: {
2468     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2469     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2470     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2471     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2472                                 CGM.VoidPtrTy,
2473                                 CGM.Int32Ty,
2474                                 CGM.VoidPtrPtrTy,
2475                                 CGM.VoidPtrPtrTy,
2476                                 CGM.Int64Ty->getPointerTo(),
2477                                 CGM.Int64Ty->getPointerTo(),
2478                                 CGM.Int32Ty,
2479                                 CGM.Int32Ty};
2480     auto *FnTy =
2481         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2482     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2483     break;
2484   }
2485   case OMPRTL__tgt_register_requires: {
2486     // Build void __tgt_register_requires(int64_t flags);
2487     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2488     auto *FnTy =
2489         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2491     break;
2492   }
2493   case OMPRTL__tgt_target_data_begin: {
2494     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2495     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2496     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2497                                 CGM.Int32Ty,
2498                                 CGM.VoidPtrPtrTy,
2499                                 CGM.VoidPtrPtrTy,
2500                                 CGM.Int64Ty->getPointerTo(),
2501                                 CGM.Int64Ty->getPointerTo()};
2502     auto *FnTy =
2503         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2504     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2505     break;
2506   }
2507   case OMPRTL__tgt_target_data_begin_nowait: {
2508     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2509     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2510     // *arg_types);
2511     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2512                                 CGM.Int32Ty,
2513                                 CGM.VoidPtrPtrTy,
2514                                 CGM.VoidPtrPtrTy,
2515                                 CGM.Int64Ty->getPointerTo(),
2516                                 CGM.Int64Ty->getPointerTo()};
2517     auto *FnTy =
2518         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2519     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2520     break;
2521   }
2522   case OMPRTL__tgt_target_data_end: {
2523     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2524     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2525     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2526                                 CGM.Int32Ty,
2527                                 CGM.VoidPtrPtrTy,
2528                                 CGM.VoidPtrPtrTy,
2529                                 CGM.Int64Ty->getPointerTo(),
2530                                 CGM.Int64Ty->getPointerTo()};
2531     auto *FnTy =
2532         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2533     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2534     break;
2535   }
2536   case OMPRTL__tgt_target_data_end_nowait: {
2537     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2538     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2539     // *arg_types);
2540     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2541                                 CGM.Int32Ty,
2542                                 CGM.VoidPtrPtrTy,
2543                                 CGM.VoidPtrPtrTy,
2544                                 CGM.Int64Ty->getPointerTo(),
2545                                 CGM.Int64Ty->getPointerTo()};
2546     auto *FnTy =
2547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2549     break;
2550   }
2551   case OMPRTL__tgt_target_data_update: {
2552     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2553     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2554     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2555                                 CGM.Int32Ty,
2556                                 CGM.VoidPtrPtrTy,
2557                                 CGM.VoidPtrPtrTy,
2558                                 CGM.Int64Ty->getPointerTo(),
2559                                 CGM.Int64Ty->getPointerTo()};
2560     auto *FnTy =
2561         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2562     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2563     break;
2564   }
2565   case OMPRTL__tgt_target_data_update_nowait: {
2566     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2567     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2568     // *arg_types);
2569     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2570                                 CGM.Int32Ty,
2571                                 CGM.VoidPtrPtrTy,
2572                                 CGM.VoidPtrPtrTy,
2573                                 CGM.Int64Ty->getPointerTo(),
2574                                 CGM.Int64Ty->getPointerTo()};
2575     auto *FnTy =
2576         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2577     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2578     break;
2579   }
2580   case OMPRTL__tgt_mapper_num_components: {
2581     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2582     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2583     auto *FnTy =
2584         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2585     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2586     break;
2587   }
2588   case OMPRTL__tgt_push_mapper_component: {
2589     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2590     // *base, void *begin, int64_t size, int64_t type);
2591     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2592                                 CGM.Int64Ty, CGM.Int64Ty};
2593     auto *FnTy =
2594         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2595     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2596     break;
2597   }
2598   }
2599   assert(RTLFn && "Unable to find OpenMP runtime function");
2600   return RTLFn;
2601 }
2602 
2603 llvm::FunctionCallee
2604 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2605   assert((IVSize == 32 || IVSize == 64) &&
2606          "IV size is not compatible with the omp runtime");
2607   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2608                                             : "__kmpc_for_static_init_4u")
2609                                 : (IVSigned ? "__kmpc_for_static_init_8"
2610                                             : "__kmpc_for_static_init_8u");
2611   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2612   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2613   llvm::Type *TypeParams[] = {
2614     getIdentTyPointerTy(),                     // loc
2615     CGM.Int32Ty,                               // tid
2616     CGM.Int32Ty,                               // schedtype
2617     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2618     PtrTy,                                     // p_lower
2619     PtrTy,                                     // p_upper
2620     PtrTy,                                     // p_stride
2621     ITy,                                       // incr
2622     ITy                                        // chunk
2623   };
2624   auto *FnTy =
2625       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2626   return CGM.CreateRuntimeFunction(FnTy, Name);
2627 }
2628 
2629 llvm::FunctionCallee
2630 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2631   assert((IVSize == 32 || IVSize == 64) &&
2632          "IV size is not compatible with the omp runtime");
2633   StringRef Name =
2634       IVSize == 32
2635           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2636           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2637   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2638   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2639                                CGM.Int32Ty,           // tid
2640                                CGM.Int32Ty,           // schedtype
2641                                ITy,                   // lower
2642                                ITy,                   // upper
2643                                ITy,                   // stride
2644                                ITy                    // chunk
2645   };
2646   auto *FnTy =
2647       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2648   return CGM.CreateRuntimeFunction(FnTy, Name);
2649 }
2650 
2651 llvm::FunctionCallee
2652 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2653   assert((IVSize == 32 || IVSize == 64) &&
2654          "IV size is not compatible with the omp runtime");
2655   StringRef Name =
2656       IVSize == 32
2657           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2658           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2659   llvm::Type *TypeParams[] = {
2660       getIdentTyPointerTy(), // loc
2661       CGM.Int32Ty,           // tid
2662   };
2663   auto *FnTy =
2664       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2665   return CGM.CreateRuntimeFunction(FnTy, Name);
2666 }
2667 
2668 llvm::FunctionCallee
2669 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2670   assert((IVSize == 32 || IVSize == 64) &&
2671          "IV size is not compatible with the omp runtime");
2672   StringRef Name =
2673       IVSize == 32
2674           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2675           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2676   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2677   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2678   llvm::Type *TypeParams[] = {
2679     getIdentTyPointerTy(),                     // loc
2680     CGM.Int32Ty,                               // tid
2681     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2682     PtrTy,                                     // p_lower
2683     PtrTy,                                     // p_upper
2684     PtrTy                                      // p_stride
2685   };
2686   auto *FnTy =
2687       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2688   return CGM.CreateRuntimeFunction(FnTy, Name);
2689 }
2690 
2691 /// Obtain information that uniquely identifies a target entry. This
2692 /// consists of the file and device IDs as well as line number associated with
2693 /// the relevant entry source location.
2694 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2695                                      unsigned &DeviceID, unsigned &FileID,
2696                                      unsigned &LineNum) {
2697   SourceManager &SM = C.getSourceManager();
2698 
2699   // The loc should be always valid and have a file ID (the user cannot use
2700   // #pragma directives in macros)
2701 
2702   assert(Loc.isValid() && "Source location is expected to be always valid.");
2703 
2704   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2705   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2706 
2707   llvm::sys::fs::UniqueID ID;
2708   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2709     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2710         << PLoc.getFilename() << EC.message();
2711 
2712   DeviceID = ID.getDevice();
2713   FileID = ID.getFile();
2714   LineNum = PLoc.getLine();
2715 }
2716 
2717 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2718   if (CGM.getLangOpts().OpenMPSimd)
2719     return Address::invalid();
2720   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2721       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2722   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2723               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2724                HasRequiresUnifiedSharedMemory))) {
2725     SmallString<64> PtrName;
2726     {
2727       llvm::raw_svector_ostream OS(PtrName);
2728       OS << CGM.getMangledName(GlobalDecl(VD));
2729       if (!VD->isExternallyVisible()) {
2730         unsigned DeviceID, FileID, Line;
2731         getTargetEntryUniqueInfo(CGM.getContext(),
2732                                  VD->getCanonicalDecl()->getBeginLoc(),
2733                                  DeviceID, FileID, Line);
2734         OS << llvm::format("_%x", FileID);
2735       }
2736       OS << "_decl_tgt_ref_ptr";
2737     }
2738     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2739     if (!Ptr) {
2740       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2741       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2742                                         PtrName);
2743 
2744       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2745       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2746 
2747       if (!CGM.getLangOpts().OpenMPIsDevice)
2748         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2749       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2750     }
2751     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2752   }
2753   return Address::invalid();
2754 }
2755 
2756 llvm::Constant *
2757 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2758   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2759          !CGM.getContext().getTargetInfo().isTLSSupported());
2760   // Lookup the entry, lazily creating it if necessary.
2761   std::string Suffix = getName({"cache", ""});
2762   return getOrCreateInternalVariable(
2763       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2764 }
2765 
2766 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2767                                                 const VarDecl *VD,
2768                                                 Address VDAddr,
2769                                                 SourceLocation Loc) {
2770   if (CGM.getLangOpts().OpenMPUseTLS &&
2771       CGM.getContext().getTargetInfo().isTLSSupported())
2772     return VDAddr;
2773 
2774   llvm::Type *VarTy = VDAddr.getElementType();
2775   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2776                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2777                                                        CGM.Int8PtrTy),
2778                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2779                          getOrCreateThreadPrivateCache(VD)};
2780   return Address(CGF.EmitRuntimeCall(
2781       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2782                  VDAddr.getAlignment());
2783 }
2784 
2785 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2786     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2787     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2788   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2789   // library.
2790   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2791   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2792                       OMPLoc);
2793   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2794   // to register constructor/destructor for variable.
2795   llvm::Value *Args[] = {
2796       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2797       Ctor, CopyCtor, Dtor};
2798   CGF.EmitRuntimeCall(
2799       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2800 }
2801 
2802 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2803     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2804     bool PerformInit, CodeGenFunction *CGF) {
2805   if (CGM.getLangOpts().OpenMPUseTLS &&
2806       CGM.getContext().getTargetInfo().isTLSSupported())
2807     return nullptr;
2808 
2809   VD = VD->getDefinition(CGM.getContext());
2810   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2811     QualType ASTTy = VD->getType();
2812 
2813     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2814     const Expr *Init = VD->getAnyInitializer();
2815     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2816       // Generate function that re-emits the declaration's initializer into the
2817       // threadprivate copy of the variable VD
2818       CodeGenFunction CtorCGF(CGM);
2819       FunctionArgList Args;
2820       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2821                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2822                             ImplicitParamDecl::Other);
2823       Args.push_back(&Dst);
2824 
2825       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2826           CGM.getContext().VoidPtrTy, Args);
2827       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2828       std::string Name = getName({"__kmpc_global_ctor_", ""});
2829       llvm::Function *Fn =
2830           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2831       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2832                             Args, Loc, Loc);
2833       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2834           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2835           CGM.getContext().VoidPtrTy, Dst.getLocation());
2836       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2837       Arg = CtorCGF.Builder.CreateElementBitCast(
2838           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2839       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2840                                /*IsInitializer=*/true);
2841       ArgVal = CtorCGF.EmitLoadOfScalar(
2842           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2843           CGM.getContext().VoidPtrTy, Dst.getLocation());
2844       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2845       CtorCGF.FinishFunction();
2846       Ctor = Fn;
2847     }
2848     if (VD->getType().isDestructedType() != QualType::DK_none) {
2849       // Generate function that emits destructor call for the threadprivate copy
2850       // of the variable VD
2851       CodeGenFunction DtorCGF(CGM);
2852       FunctionArgList Args;
2853       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2854                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2855                             ImplicitParamDecl::Other);
2856       Args.push_back(&Dst);
2857 
2858       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2859           CGM.getContext().VoidTy, Args);
2860       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2861       std::string Name = getName({"__kmpc_global_dtor_", ""});
2862       llvm::Function *Fn =
2863           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2864       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2865       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2866                             Loc, Loc);
2867       // Create a scope with an artificial location for the body of this function.
2868       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2869       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2870           DtorCGF.GetAddrOfLocalVar(&Dst),
2871           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2872       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2873                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2874                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2875       DtorCGF.FinishFunction();
2876       Dtor = Fn;
2877     }
2878     // Do not emit init function if it is not required.
2879     if (!Ctor && !Dtor)
2880       return nullptr;
2881 
2882     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2883     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2884                                                /*isVarArg=*/false)
2885                            ->getPointerTo();
2886     // Copying constructor for the threadprivate variable.
2887     // Must be NULL - reserved by runtime, but currently it requires that this
2888     // parameter is always NULL. Otherwise it fires assertion.
2889     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2890     if (Ctor == nullptr) {
2891       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2892                                              /*isVarArg=*/false)
2893                          ->getPointerTo();
2894       Ctor = llvm::Constant::getNullValue(CtorTy);
2895     }
2896     if (Dtor == nullptr) {
2897       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2898                                              /*isVarArg=*/false)
2899                          ->getPointerTo();
2900       Dtor = llvm::Constant::getNullValue(DtorTy);
2901     }
2902     if (!CGF) {
2903       auto *InitFunctionTy =
2904           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2905       std::string Name = getName({"__omp_threadprivate_init_", ""});
2906       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2907           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2908       CodeGenFunction InitCGF(CGM);
2909       FunctionArgList ArgList;
2910       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2911                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2912                             Loc, Loc);
2913       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2914       InitCGF.FinishFunction();
2915       return InitFunction;
2916     }
2917     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2918   }
2919   return nullptr;
2920 }
2921 
2922 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2923                                                      llvm::GlobalVariable *Addr,
2924                                                      bool PerformInit) {
2925   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2926       !CGM.getLangOpts().OpenMPIsDevice)
2927     return false;
2928   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2929       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2930   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2931       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2932        HasRequiresUnifiedSharedMemory))
2933     return CGM.getLangOpts().OpenMPIsDevice;
2934   VD = VD->getDefinition(CGM.getContext());
2935   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2936     return CGM.getLangOpts().OpenMPIsDevice;
2937 
2938   QualType ASTTy = VD->getType();
2939 
2940   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2941   // Produce the unique prefix to identify the new target regions. We use
2942   // the source location of the variable declaration which we know to not
2943   // conflict with any target region.
2944   unsigned DeviceID;
2945   unsigned FileID;
2946   unsigned Line;
2947   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2948   SmallString<128> Buffer, Out;
2949   {
2950     llvm::raw_svector_ostream OS(Buffer);
2951     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2952        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2953   }
2954 
2955   const Expr *Init = VD->getAnyInitializer();
2956   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2957     llvm::Constant *Ctor;
2958     llvm::Constant *ID;
2959     if (CGM.getLangOpts().OpenMPIsDevice) {
2960       // Generate function that re-emits the declaration's initializer into
2961       // the threadprivate copy of the variable VD
2962       CodeGenFunction CtorCGF(CGM);
2963 
2964       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2965       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2966       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2967           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2968       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2969       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2970                             FunctionArgList(), Loc, Loc);
2971       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2972       CtorCGF.EmitAnyExprToMem(Init,
2973                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2974                                Init->getType().getQualifiers(),
2975                                /*IsInitializer=*/true);
2976       CtorCGF.FinishFunction();
2977       Ctor = Fn;
2978       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2979       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2980     } else {
2981       Ctor = new llvm::GlobalVariable(
2982           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2983           llvm::GlobalValue::PrivateLinkage,
2984           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2985       ID = Ctor;
2986     }
2987 
2988     // Register the information for the entry associated with the constructor.
2989     Out.clear();
2990     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2991         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2992         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2993   }
2994   if (VD->getType().isDestructedType() != QualType::DK_none) {
2995     llvm::Constant *Dtor;
2996     llvm::Constant *ID;
2997     if (CGM.getLangOpts().OpenMPIsDevice) {
2998       // Generate function that emits destructor call for the threadprivate
2999       // copy of the variable VD
3000       CodeGenFunction DtorCGF(CGM);
3001 
3002       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3003       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3004       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3005           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3006       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3007       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3008                             FunctionArgList(), Loc, Loc);
3009       // Create a scope with an artificial location for the body of this
3010       // function.
3011       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3012       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3013                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3014                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3015       DtorCGF.FinishFunction();
3016       Dtor = Fn;
3017       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3018       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3019     } else {
3020       Dtor = new llvm::GlobalVariable(
3021           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3022           llvm::GlobalValue::PrivateLinkage,
3023           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3024       ID = Dtor;
3025     }
3026     // Register the information for the entry associated with the destructor.
3027     Out.clear();
3028     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3029         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3030         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3031   }
3032   return CGM.getLangOpts().OpenMPIsDevice;
3033 }
3034 
3035 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3036                                                           QualType VarType,
3037                                                           StringRef Name) {
3038   std::string Suffix = getName({"artificial", ""});
3039   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3040   llvm::Value *GAddr =
3041       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3042   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3043       CGM.getTarget().isTLSSupported()) {
3044     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3045     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3046   }
3047   std::string CacheSuffix = getName({"cache", ""});
3048   llvm::Value *Args[] = {
3049       emitUpdateLocation(CGF, SourceLocation()),
3050       getThreadID(CGF, SourceLocation()),
3051       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3052       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3053                                 /*isSigned=*/false),
3054       getOrCreateInternalVariable(
3055           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3056   return Address(
3057       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3058           CGF.EmitRuntimeCall(
3059               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3060           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3061       CGM.getContext().getTypeAlignInChars(VarType));
3062 }
3063 
3064 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3065                                    const RegionCodeGenTy &ThenGen,
3066                                    const RegionCodeGenTy &ElseGen) {
3067   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3068 
3069   // If the condition constant folds and can be elided, try to avoid emitting
3070   // the condition and the dead arm of the if/else.
3071   bool CondConstant;
3072   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3073     if (CondConstant)
3074       ThenGen(CGF);
3075     else
3076       ElseGen(CGF);
3077     return;
3078   }
3079 
3080   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3081   // emit the conditional branch.
3082   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3083   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3084   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3085   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3086 
3087   // Emit the 'then' code.
3088   CGF.EmitBlock(ThenBlock);
3089   ThenGen(CGF);
3090   CGF.EmitBranch(ContBlock);
3091   // Emit the 'else' code if present.
3092   // There is no need to emit line number for unconditional branch.
3093   (void)ApplyDebugLocation::CreateEmpty(CGF);
3094   CGF.EmitBlock(ElseBlock);
3095   ElseGen(CGF);
3096   // There is no need to emit line number for unconditional branch.
3097   (void)ApplyDebugLocation::CreateEmpty(CGF);
3098   CGF.EmitBranch(ContBlock);
3099   // Emit the continuation block for code after the if.
3100   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3101 }
3102 
3103 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3104                                        llvm::Function *OutlinedFn,
3105                                        ArrayRef<llvm::Value *> CapturedVars,
3106                                        const Expr *IfCond) {
3107   if (!CGF.HaveInsertPoint())
3108     return;
3109   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3110   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3111                                                      PrePostActionTy &) {
3112     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3113     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3114     llvm::Value *Args[] = {
3115         RTLoc,
3116         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3117         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3118     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3119     RealArgs.append(std::begin(Args), std::end(Args));
3120     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3121 
3122     llvm::FunctionCallee RTLFn =
3123         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3124     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3125   };
3126   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3127                                                           PrePostActionTy &) {
3128     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3129     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3130     // Build calls:
3131     // __kmpc_serialized_parallel(&Loc, GTid);
3132     llvm::Value *Args[] = {RTLoc, ThreadID};
3133     CGF.EmitRuntimeCall(
3134         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3135 
3136     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3137     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3138     Address ZeroAddrBound =
3139         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3140                                          /*Name=*/".bound.zero.addr");
3141     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3142     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3143     // ThreadId for serialized parallels is 0.
3144     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3145     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3146     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3147     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3148 
3149     // __kmpc_end_serialized_parallel(&Loc, GTid);
3150     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3151     CGF.EmitRuntimeCall(
3152         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3153         EndArgs);
3154   };
3155   if (IfCond) {
3156     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3157   } else {
3158     RegionCodeGenTy ThenRCG(ThenGen);
3159     ThenRCG(CGF);
3160   }
3161 }
3162 
3163 // If we're inside an (outlined) parallel region, use the region info's
3164 // thread-ID variable (it is passed in a first argument of the outlined function
3165 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3166 // regular serial code region, get thread ID by calling kmp_int32
3167 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3168 // return the address of that temp.
3169 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3170                                              SourceLocation Loc) {
3171   if (auto *OMPRegionInfo =
3172           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3173     if (OMPRegionInfo->getThreadIDVariable())
3174       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3175 
3176   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3177   QualType Int32Ty =
3178       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3179   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3180   CGF.EmitStoreOfScalar(ThreadID,
3181                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3182 
3183   return ThreadIDTemp;
3184 }
3185 
3186 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3187     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3188   SmallString<256> Buffer;
3189   llvm::raw_svector_ostream Out(Buffer);
3190   Out << Name;
3191   StringRef RuntimeName = Out.str();
3192   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3193   if (Elem.second) {
3194     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3195            "OMP internal variable has different type than requested");
3196     return &*Elem.second;
3197   }
3198 
3199   return Elem.second = new llvm::GlobalVariable(
3200              CGM.getModule(), Ty, /*IsConstant*/ false,
3201              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3202              Elem.first(), /*InsertBefore=*/nullptr,
3203              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3204 }
3205 
3206 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3207   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3208   std::string Name = getName({Prefix, "var"});
3209   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3210 }
3211 
3212 namespace {
3213 /// Common pre(post)-action for different OpenMP constructs.
3214 class CommonActionTy final : public PrePostActionTy {
3215   llvm::FunctionCallee EnterCallee;
3216   ArrayRef<llvm::Value *> EnterArgs;
3217   llvm::FunctionCallee ExitCallee;
3218   ArrayRef<llvm::Value *> ExitArgs;
3219   bool Conditional;
3220   llvm::BasicBlock *ContBlock = nullptr;
3221 
3222 public:
3223   CommonActionTy(llvm::FunctionCallee EnterCallee,
3224                  ArrayRef<llvm::Value *> EnterArgs,
3225                  llvm::FunctionCallee ExitCallee,
3226                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3227       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3228         ExitArgs(ExitArgs), Conditional(Conditional) {}
3229   void Enter(CodeGenFunction &CGF) override {
3230     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3231     if (Conditional) {
3232       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3233       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3234       ContBlock = CGF.createBasicBlock("omp_if.end");
3235       // Generate the branch (If-stmt)
3236       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3237       CGF.EmitBlock(ThenBlock);
3238     }
3239   }
3240   void Done(CodeGenFunction &CGF) {
3241     // Emit the rest of blocks/branches
3242     CGF.EmitBranch(ContBlock);
3243     CGF.EmitBlock(ContBlock, true);
3244   }
3245   void Exit(CodeGenFunction &CGF) override {
3246     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3247   }
3248 };
3249 } // anonymous namespace
3250 
3251 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3252                                          StringRef CriticalName,
3253                                          const RegionCodeGenTy &CriticalOpGen,
3254                                          SourceLocation Loc, const Expr *Hint) {
3255   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3256   // CriticalOpGen();
3257   // __kmpc_end_critical(ident_t *, gtid, Lock);
3258   // Prepare arguments and build a call to __kmpc_critical
3259   if (!CGF.HaveInsertPoint())
3260     return;
3261   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3262                          getCriticalRegionLock(CriticalName)};
3263   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3264                                                 std::end(Args));
3265   if (Hint) {
3266     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3267         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3268   }
3269   CommonActionTy Action(
3270       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3271                                  : OMPRTL__kmpc_critical),
3272       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3273   CriticalOpGen.setAction(Action);
3274   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3275 }
3276 
3277 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3278                                        const RegionCodeGenTy &MasterOpGen,
3279                                        SourceLocation Loc) {
3280   if (!CGF.HaveInsertPoint())
3281     return;
3282   // if(__kmpc_master(ident_t *, gtid)) {
3283   //   MasterOpGen();
3284   //   __kmpc_end_master(ident_t *, gtid);
3285   // }
3286   // Prepare arguments and build a call to __kmpc_master
3287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3288   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3289                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3290                         /*Conditional=*/true);
3291   MasterOpGen.setAction(Action);
3292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3293   Action.Done(CGF);
3294 }
3295 
3296 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3297                                         SourceLocation Loc) {
3298   if (!CGF.HaveInsertPoint())
3299     return;
3300   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3301   if (OMPBuilder) {
3302     OMPBuilder->CreateTaskyield(CGF.Builder);
3303   } else {
3304     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3305     llvm::Value *Args[] = {
3306         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3307         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3308     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3309                         Args);
3310   }
3311 
3312   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3313     Region->emitUntiedSwitch(CGF);
3314 }
3315 
3316 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3317                                           const RegionCodeGenTy &TaskgroupOpGen,
3318                                           SourceLocation Loc) {
3319   if (!CGF.HaveInsertPoint())
3320     return;
3321   // __kmpc_taskgroup(ident_t *, gtid);
3322   // TaskgroupOpGen();
3323   // __kmpc_end_taskgroup(ident_t *, gtid);
3324   // Prepare arguments and build a call to __kmpc_taskgroup
3325   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3326   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3327                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3328                         Args);
3329   TaskgroupOpGen.setAction(Action);
3330   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3331 }
3332 
3333 /// Given an array of pointers to variables, project the address of a
3334 /// given variable.
3335 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3336                                       unsigned Index, const VarDecl *Var) {
3337   // Pull out the pointer to the variable.
3338   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3339   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3340 
3341   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3342   Addr = CGF.Builder.CreateElementBitCast(
3343       Addr, CGF.ConvertTypeForMem(Var->getType()));
3344   return Addr;
3345 }
3346 
3347 static llvm::Value *emitCopyprivateCopyFunction(
3348     CodeGenModule &CGM, llvm::Type *ArgsType,
3349     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3350     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3351     SourceLocation Loc) {
3352   ASTContext &C = CGM.getContext();
3353   // void copy_func(void *LHSArg, void *RHSArg);
3354   FunctionArgList Args;
3355   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3356                            ImplicitParamDecl::Other);
3357   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3358                            ImplicitParamDecl::Other);
3359   Args.push_back(&LHSArg);
3360   Args.push_back(&RHSArg);
3361   const auto &CGFI =
3362       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3363   std::string Name =
3364       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3365   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3366                                     llvm::GlobalValue::InternalLinkage, Name,
3367                                     &CGM.getModule());
3368   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3369   Fn->setDoesNotRecurse();
3370   CodeGenFunction CGF(CGM);
3371   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3372   // Dest = (void*[n])(LHSArg);
3373   // Src = (void*[n])(RHSArg);
3374   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3375       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3376       ArgsType), CGF.getPointerAlign());
3377   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3378       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3379       ArgsType), CGF.getPointerAlign());
3380   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3381   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3382   // ...
3383   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3384   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3385     const auto *DestVar =
3386         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3387     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3388 
3389     const auto *SrcVar =
3390         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3391     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3392 
3393     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3394     QualType Type = VD->getType();
3395     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3396   }
3397   CGF.FinishFunction();
3398   return Fn;
3399 }
3400 
3401 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3402                                        const RegionCodeGenTy &SingleOpGen,
3403                                        SourceLocation Loc,
3404                                        ArrayRef<const Expr *> CopyprivateVars,
3405                                        ArrayRef<const Expr *> SrcExprs,
3406                                        ArrayRef<const Expr *> DstExprs,
3407                                        ArrayRef<const Expr *> AssignmentOps) {
3408   if (!CGF.HaveInsertPoint())
3409     return;
3410   assert(CopyprivateVars.size() == SrcExprs.size() &&
3411          CopyprivateVars.size() == DstExprs.size() &&
3412          CopyprivateVars.size() == AssignmentOps.size());
3413   ASTContext &C = CGM.getContext();
3414   // int32 did_it = 0;
3415   // if(__kmpc_single(ident_t *, gtid)) {
3416   //   SingleOpGen();
3417   //   __kmpc_end_single(ident_t *, gtid);
3418   //   did_it = 1;
3419   // }
3420   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3421   // <copy_func>, did_it);
3422 
3423   Address DidIt = Address::invalid();
3424   if (!CopyprivateVars.empty()) {
3425     // int32 did_it = 0;
3426     QualType KmpInt32Ty =
3427         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3428     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3429     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3430   }
3431   // Prepare arguments and build a call to __kmpc_single
3432   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3433   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3434                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3435                         /*Conditional=*/true);
3436   SingleOpGen.setAction(Action);
3437   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3438   if (DidIt.isValid()) {
3439     // did_it = 1;
3440     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3441   }
3442   Action.Done(CGF);
3443   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3444   // <copy_func>, did_it);
3445   if (DidIt.isValid()) {
3446     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3447     QualType CopyprivateArrayTy = C.getConstantArrayType(
3448         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3449         /*IndexTypeQuals=*/0);
3450     // Create a list of all private variables for copyprivate.
3451     Address CopyprivateList =
3452         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3453     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3454       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3455       CGF.Builder.CreateStore(
3456           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3457               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3458               CGF.VoidPtrTy),
3459           Elem);
3460     }
3461     // Build function that copies private values from single region to all other
3462     // threads in the corresponding parallel region.
3463     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3464         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3465         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3466     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3467     Address CL =
3468       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3469                                                       CGF.VoidPtrTy);
3470     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3471     llvm::Value *Args[] = {
3472         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3473         getThreadID(CGF, Loc),        // i32 <gtid>
3474         BufSize,                      // size_t <buf_size>
3475         CL.getPointer(),              // void *<copyprivate list>
3476         CpyFn,                        // void (*) (void *, void *) <copy_func>
3477         DidItVal                      // i32 did_it
3478     };
3479     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3480   }
3481 }
3482 
3483 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3484                                         const RegionCodeGenTy &OrderedOpGen,
3485                                         SourceLocation Loc, bool IsThreads) {
3486   if (!CGF.HaveInsertPoint())
3487     return;
3488   // __kmpc_ordered(ident_t *, gtid);
3489   // OrderedOpGen();
3490   // __kmpc_end_ordered(ident_t *, gtid);
3491   // Prepare arguments and build a call to __kmpc_ordered
3492   if (IsThreads) {
3493     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3494     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3495                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3496                           Args);
3497     OrderedOpGen.setAction(Action);
3498     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3499     return;
3500   }
3501   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3502 }
3503 
3504 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3505   unsigned Flags;
3506   if (Kind == OMPD_for)
3507     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3508   else if (Kind == OMPD_sections)
3509     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3510   else if (Kind == OMPD_single)
3511     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3512   else if (Kind == OMPD_barrier)
3513     Flags = OMP_IDENT_BARRIER_EXPL;
3514   else
3515     Flags = OMP_IDENT_BARRIER_IMPL;
3516   return Flags;
3517 }
3518 
3519 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3520     CodeGenFunction &CGF, const OMPLoopDirective &S,
3521     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3522   // Check if the loop directive is actually a doacross loop directive. In this
3523   // case choose static, 1 schedule.
3524   if (llvm::any_of(
3525           S.getClausesOfKind<OMPOrderedClause>(),
3526           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3527     ScheduleKind = OMPC_SCHEDULE_static;
3528     // Chunk size is 1 in this case.
3529     llvm::APInt ChunkSize(32, 1);
3530     ChunkExpr = IntegerLiteral::Create(
3531         CGF.getContext(), ChunkSize,
3532         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3533         SourceLocation());
3534   }
3535 }
3536 
3537 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3538                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3539                                       bool ForceSimpleCall) {
3540   // Check if we should use the OMPBuilder
3541   auto *OMPRegionInfo =
3542       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3543   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3544   if (OMPBuilder) {
3545     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3546         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3547     return;
3548   }
3549 
3550   if (!CGF.HaveInsertPoint())
3551     return;
3552   // Build call __kmpc_cancel_barrier(loc, thread_id);
3553   // Build call __kmpc_barrier(loc, thread_id);
3554   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3555   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3556   // thread_id);
3557   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3558                          getThreadID(CGF, Loc)};
3559   if (OMPRegionInfo) {
3560     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3561       llvm::Value *Result = CGF.EmitRuntimeCall(
3562           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3563       if (EmitChecks) {
3564         // if (__kmpc_cancel_barrier()) {
3565         //   exit from construct;
3566         // }
3567         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3568         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3569         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3570         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3571         CGF.EmitBlock(ExitBB);
3572         //   exit from construct;
3573         CodeGenFunction::JumpDest CancelDestination =
3574             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3575         CGF.EmitBranchThroughCleanup(CancelDestination);
3576         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3577       }
3578       return;
3579     }
3580   }
3581   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3582 }
3583 
3584 /// Map the OpenMP loop schedule to the runtime enumeration.
3585 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3586                                           bool Chunked, bool Ordered) {
3587   switch (ScheduleKind) {
3588   case OMPC_SCHEDULE_static:
3589     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3590                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3591   case OMPC_SCHEDULE_dynamic:
3592     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3593   case OMPC_SCHEDULE_guided:
3594     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3595   case OMPC_SCHEDULE_runtime:
3596     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3597   case OMPC_SCHEDULE_auto:
3598     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3599   case OMPC_SCHEDULE_unknown:
3600     assert(!Chunked && "chunk was specified but schedule kind not known");
3601     return Ordered ? OMP_ord_static : OMP_sch_static;
3602   }
3603   llvm_unreachable("Unexpected runtime schedule");
3604 }
3605 
3606 /// Map the OpenMP distribute schedule to the runtime enumeration.
3607 static OpenMPSchedType
3608 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3609   // only static is allowed for dist_schedule
3610   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3611 }
3612 
3613 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3614                                          bool Chunked) const {
3615   OpenMPSchedType Schedule =
3616       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3617   return Schedule == OMP_sch_static;
3618 }
3619 
3620 bool CGOpenMPRuntime::isStaticNonchunked(
3621     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3622   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3623   return Schedule == OMP_dist_sch_static;
3624 }
3625 
3626 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3627                                       bool Chunked) const {
3628   OpenMPSchedType Schedule =
3629       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3630   return Schedule == OMP_sch_static_chunked;
3631 }
3632 
3633 bool CGOpenMPRuntime::isStaticChunked(
3634     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3635   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3636   return Schedule == OMP_dist_sch_static_chunked;
3637 }
3638 
3639 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3640   OpenMPSchedType Schedule =
3641       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3642   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3643   return Schedule != OMP_sch_static;
3644 }
3645 
3646 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3647                                   OpenMPScheduleClauseModifier M1,
3648                                   OpenMPScheduleClauseModifier M2) {
3649   int Modifier = 0;
3650   switch (M1) {
3651   case OMPC_SCHEDULE_MODIFIER_monotonic:
3652     Modifier = OMP_sch_modifier_monotonic;
3653     break;
3654   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3655     Modifier = OMP_sch_modifier_nonmonotonic;
3656     break;
3657   case OMPC_SCHEDULE_MODIFIER_simd:
3658     if (Schedule == OMP_sch_static_chunked)
3659       Schedule = OMP_sch_static_balanced_chunked;
3660     break;
3661   case OMPC_SCHEDULE_MODIFIER_last:
3662   case OMPC_SCHEDULE_MODIFIER_unknown:
3663     break;
3664   }
3665   switch (M2) {
3666   case OMPC_SCHEDULE_MODIFIER_monotonic:
3667     Modifier = OMP_sch_modifier_monotonic;
3668     break;
3669   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3670     Modifier = OMP_sch_modifier_nonmonotonic;
3671     break;
3672   case OMPC_SCHEDULE_MODIFIER_simd:
3673     if (Schedule == OMP_sch_static_chunked)
3674       Schedule = OMP_sch_static_balanced_chunked;
3675     break;
3676   case OMPC_SCHEDULE_MODIFIER_last:
3677   case OMPC_SCHEDULE_MODIFIER_unknown:
3678     break;
3679   }
3680   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3681   // If the static schedule kind is specified or if the ordered clause is
3682   // specified, and if the nonmonotonic modifier is not specified, the effect is
3683   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3684   // modifier is specified, the effect is as if the nonmonotonic modifier is
3685   // specified.
3686   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3687     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3688           Schedule == OMP_sch_static_balanced_chunked ||
3689           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3690           Schedule == OMP_dist_sch_static_chunked ||
3691           Schedule == OMP_dist_sch_static))
3692       Modifier = OMP_sch_modifier_nonmonotonic;
3693   }
3694   return Schedule | Modifier;
3695 }
3696 
3697 void CGOpenMPRuntime::emitForDispatchInit(
3698     CodeGenFunction &CGF, SourceLocation Loc,
3699     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3700     bool Ordered, const DispatchRTInput &DispatchValues) {
3701   if (!CGF.HaveInsertPoint())
3702     return;
3703   OpenMPSchedType Schedule = getRuntimeSchedule(
3704       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3705   assert(Ordered ||
3706          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3707           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3708           Schedule != OMP_sch_static_balanced_chunked));
3709   // Call __kmpc_dispatch_init(
3710   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3711   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3712   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3713 
3714   // If the Chunk was not specified in the clause - use default value 1.
3715   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3716                                             : CGF.Builder.getIntN(IVSize, 1);
3717   llvm::Value *Args[] = {
3718       emitUpdateLocation(CGF, Loc),
3719       getThreadID(CGF, Loc),
3720       CGF.Builder.getInt32(addMonoNonMonoModifier(
3721           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3722       DispatchValues.LB,                                     // Lower
3723       DispatchValues.UB,                                     // Upper
3724       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3725       Chunk                                                  // Chunk
3726   };
3727   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3728 }
3729 
3730 static void emitForStaticInitCall(
3731     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3732     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3733     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3734     const CGOpenMPRuntime::StaticRTInput &Values) {
3735   if (!CGF.HaveInsertPoint())
3736     return;
3737 
3738   assert(!Values.Ordered);
3739   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3740          Schedule == OMP_sch_static_balanced_chunked ||
3741          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3742          Schedule == OMP_dist_sch_static ||
3743          Schedule == OMP_dist_sch_static_chunked);
3744 
3745   // Call __kmpc_for_static_init(
3746   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3747   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3748   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3749   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3750   llvm::Value *Chunk = Values.Chunk;
3751   if (Chunk == nullptr) {
3752     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3753             Schedule == OMP_dist_sch_static) &&
3754            "expected static non-chunked schedule");
3755     // If the Chunk was not specified in the clause - use default value 1.
3756     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3757   } else {
3758     assert((Schedule == OMP_sch_static_chunked ||
3759             Schedule == OMP_sch_static_balanced_chunked ||
3760             Schedule == OMP_ord_static_chunked ||
3761             Schedule == OMP_dist_sch_static_chunked) &&
3762            "expected static chunked schedule");
3763   }
3764   llvm::Value *Args[] = {
3765       UpdateLocation,
3766       ThreadId,
3767       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3768                                                   M2)), // Schedule type
3769       Values.IL.getPointer(),                           // &isLastIter
3770       Values.LB.getPointer(),                           // &LB
3771       Values.UB.getPointer(),                           // &UB
3772       Values.ST.getPointer(),                           // &Stride
3773       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3774       Chunk                                             // Chunk
3775   };
3776   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3777 }
3778 
3779 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3780                                         SourceLocation Loc,
3781                                         OpenMPDirectiveKind DKind,
3782                                         const OpenMPScheduleTy &ScheduleKind,
3783                                         const StaticRTInput &Values) {
3784   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3785       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3786   assert(isOpenMPWorksharingDirective(DKind) &&
3787          "Expected loop-based or sections-based directive.");
3788   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3789                                              isOpenMPLoopDirective(DKind)
3790                                                  ? OMP_IDENT_WORK_LOOP
3791                                                  : OMP_IDENT_WORK_SECTIONS);
3792   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3793   llvm::FunctionCallee StaticInitFunction =
3794       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3795   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3796   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3797                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3798 }
3799 
3800 void CGOpenMPRuntime::emitDistributeStaticInit(
3801     CodeGenFunction &CGF, SourceLocation Loc,
3802     OpenMPDistScheduleClauseKind SchedKind,
3803     const CGOpenMPRuntime::StaticRTInput &Values) {
3804   OpenMPSchedType ScheduleNum =
3805       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3806   llvm::Value *UpdatedLocation =
3807       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3808   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3809   llvm::FunctionCallee StaticInitFunction =
3810       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3811   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3812                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3813                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3814 }
3815 
3816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3817                                           SourceLocation Loc,
3818                                           OpenMPDirectiveKind DKind) {
3819   if (!CGF.HaveInsertPoint())
3820     return;
3821   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3822   llvm::Value *Args[] = {
3823       emitUpdateLocation(CGF, Loc,
3824                          isOpenMPDistributeDirective(DKind)
3825                              ? OMP_IDENT_WORK_DISTRIBUTE
3826                              : isOpenMPLoopDirective(DKind)
3827                                    ? OMP_IDENT_WORK_LOOP
3828                                    : OMP_IDENT_WORK_SECTIONS),
3829       getThreadID(CGF, Loc)};
3830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3831   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3832                       Args);
3833 }
3834 
3835 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3836                                                  SourceLocation Loc,
3837                                                  unsigned IVSize,
3838                                                  bool IVSigned) {
3839   if (!CGF.HaveInsertPoint())
3840     return;
3841   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3842   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3843   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3844 }
3845 
3846 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3847                                           SourceLocation Loc, unsigned IVSize,
3848                                           bool IVSigned, Address IL,
3849                                           Address LB, Address UB,
3850                                           Address ST) {
3851   // Call __kmpc_dispatch_next(
3852   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3853   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3854   //          kmp_int[32|64] *p_stride);
3855   llvm::Value *Args[] = {
3856       emitUpdateLocation(CGF, Loc),
3857       getThreadID(CGF, Loc),
3858       IL.getPointer(), // &isLastIter
3859       LB.getPointer(), // &Lower
3860       UB.getPointer(), // &Upper
3861       ST.getPointer()  // &Stride
3862   };
3863   llvm::Value *Call =
3864       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3865   return CGF.EmitScalarConversion(
3866       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3867       CGF.getContext().BoolTy, Loc);
3868 }
3869 
3870 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3871                                            llvm::Value *NumThreads,
3872                                            SourceLocation Loc) {
3873   if (!CGF.HaveInsertPoint())
3874     return;
3875   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3876   llvm::Value *Args[] = {
3877       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3878       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3879   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3880                       Args);
3881 }
3882 
3883 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3884                                          ProcBindKind ProcBind,
3885                                          SourceLocation Loc) {
3886   if (!CGF.HaveInsertPoint())
3887     return;
3888   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3889   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3890   llvm::Value *Args[] = {
3891       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3892       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3893   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3894 }
3895 
3896 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3897                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3898   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3899   if (OMPBuilder) {
3900     OMPBuilder->CreateFlush(CGF.Builder);
3901   } else {
3902     if (!CGF.HaveInsertPoint())
3903       return;
3904     // Build call void __kmpc_flush(ident_t *loc)
3905     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3906                         emitUpdateLocation(CGF, Loc));
3907   }
3908 }
3909 
3910 namespace {
3911 /// Indexes of fields for type kmp_task_t.
3912 enum KmpTaskTFields {
3913   /// List of shared variables.
3914   KmpTaskTShareds,
3915   /// Task routine.
3916   KmpTaskTRoutine,
3917   /// Partition id for the untied tasks.
3918   KmpTaskTPartId,
3919   /// Function with call of destructors for private variables.
3920   Data1,
3921   /// Task priority.
3922   Data2,
3923   /// (Taskloops only) Lower bound.
3924   KmpTaskTLowerBound,
3925   /// (Taskloops only) Upper bound.
3926   KmpTaskTUpperBound,
3927   /// (Taskloops only) Stride.
3928   KmpTaskTStride,
3929   /// (Taskloops only) Is last iteration flag.
3930   KmpTaskTLastIter,
3931   /// (Taskloops only) Reduction data.
3932   KmpTaskTReductions,
3933 };
3934 } // anonymous namespace
3935 
3936 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3937   return OffloadEntriesTargetRegion.empty() &&
3938          OffloadEntriesDeviceGlobalVar.empty();
3939 }
3940 
3941 /// Initialize target region entry.
3942 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3943     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3944                                     StringRef ParentName, unsigned LineNum,
3945                                     unsigned Order) {
3946   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3947                                              "only required for the device "
3948                                              "code generation.");
3949   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3950       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3951                                    OMPTargetRegionEntryTargetRegion);
3952   ++OffloadingEntriesNum;
3953 }
3954 
3955 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3956     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3957                                   StringRef ParentName, unsigned LineNum,
3958                                   llvm::Constant *Addr, llvm::Constant *ID,
3959                                   OMPTargetRegionEntryKind Flags) {
3960   // If we are emitting code for a target, the entry is already initialized,
3961   // only has to be registered.
3962   if (CGM.getLangOpts().OpenMPIsDevice) {
3963     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3964       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3965           DiagnosticsEngine::Error,
3966           "Unable to find target region on line '%0' in the device code.");
3967       CGM.getDiags().Report(DiagID) << LineNum;
3968       return;
3969     }
3970     auto &Entry =
3971         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3972     assert(Entry.isValid() && "Entry not initialized!");
3973     Entry.setAddress(Addr);
3974     Entry.setID(ID);
3975     Entry.setFlags(Flags);
3976   } else {
3977     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3978     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3979     ++OffloadingEntriesNum;
3980   }
3981 }
3982 
3983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3984     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3985     unsigned LineNum) const {
3986   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3987   if (PerDevice == OffloadEntriesTargetRegion.end())
3988     return false;
3989   auto PerFile = PerDevice->second.find(FileID);
3990   if (PerFile == PerDevice->second.end())
3991     return false;
3992   auto PerParentName = PerFile->second.find(ParentName);
3993   if (PerParentName == PerFile->second.end())
3994     return false;
3995   auto PerLine = PerParentName->second.find(LineNum);
3996   if (PerLine == PerParentName->second.end())
3997     return false;
3998   // Fail if this entry is already registered.
3999   if (PerLine->second.getAddress() || PerLine->second.getID())
4000     return false;
4001   return true;
4002 }
4003 
4004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4005     const OffloadTargetRegionEntryInfoActTy &Action) {
4006   // Scan all target region entries and perform the provided action.
4007   for (const auto &D : OffloadEntriesTargetRegion)
4008     for (const auto &F : D.second)
4009       for (const auto &P : F.second)
4010         for (const auto &L : P.second)
4011           Action(D.first, F.first, P.first(), L.first, L.second);
4012 }
4013 
4014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4015     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4016                                        OMPTargetGlobalVarEntryKind Flags,
4017                                        unsigned Order) {
4018   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4019                                              "only required for the device "
4020                                              "code generation.");
4021   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4022   ++OffloadingEntriesNum;
4023 }
4024 
4025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4026     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4027                                      CharUnits VarSize,
4028                                      OMPTargetGlobalVarEntryKind Flags,
4029                                      llvm::GlobalValue::LinkageTypes Linkage) {
4030   if (CGM.getLangOpts().OpenMPIsDevice) {
4031     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4032     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4033            "Entry not initialized!");
4034     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4035            "Resetting with the new address.");
4036     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4037       if (Entry.getVarSize().isZero()) {
4038         Entry.setVarSize(VarSize);
4039         Entry.setLinkage(Linkage);
4040       }
4041       return;
4042     }
4043     Entry.setVarSize(VarSize);
4044     Entry.setLinkage(Linkage);
4045     Entry.setAddress(Addr);
4046   } else {
4047     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4048       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4049       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4050              "Entry not initialized!");
4051       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4052              "Resetting with the new address.");
4053       if (Entry.getVarSize().isZero()) {
4054         Entry.setVarSize(VarSize);
4055         Entry.setLinkage(Linkage);
4056       }
4057       return;
4058     }
4059     OffloadEntriesDeviceGlobalVar.try_emplace(
4060         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4061     ++OffloadingEntriesNum;
4062   }
4063 }
4064 
4065 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4066     actOnDeviceGlobalVarEntriesInfo(
4067         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4068   // Scan all target region entries and perform the provided action.
4069   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4070     Action(E.getKey(), E.getValue());
4071 }
4072 
4073 void CGOpenMPRuntime::createOffloadEntry(
4074     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4075     llvm::GlobalValue::LinkageTypes Linkage) {
4076   StringRef Name = Addr->getName();
4077   llvm::Module &M = CGM.getModule();
4078   llvm::LLVMContext &C = M.getContext();
4079 
4080   // Create constant string with the name.
4081   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4082 
4083   std::string StringName = getName({"omp_offloading", "entry_name"});
4084   auto *Str = new llvm::GlobalVariable(
4085       M, StrPtrInit->getType(), /*isConstant=*/true,
4086       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4087   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4088 
4089   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4090                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4091                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4092                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4093                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4094   std::string EntryName = getName({"omp_offloading", "entry", ""});
4095   llvm::GlobalVariable *Entry = createGlobalStruct(
4096       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4097       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4098 
4099   // The entry has to be created in the section the linker expects it to be.
4100   Entry->setSection("omp_offloading_entries");
4101 }
4102 
4103 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4104   // Emit the offloading entries and metadata so that the device codegen side
4105   // can easily figure out what to emit. The produced metadata looks like
4106   // this:
4107   //
4108   // !omp_offload.info = !{!1, ...}
4109   //
4110   // Right now we only generate metadata for function that contain target
4111   // regions.
4112 
4113   // If we are in simd mode or there are no entries, we don't need to do
4114   // anything.
4115   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4116     return;
4117 
4118   llvm::Module &M = CGM.getModule();
4119   llvm::LLVMContext &C = M.getContext();
4120   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4121                          SourceLocation, StringRef>,
4122               16>
4123       OrderedEntries(OffloadEntriesInfoManager.size());
4124   llvm::SmallVector<StringRef, 16> ParentFunctions(
4125       OffloadEntriesInfoManager.size());
4126 
4127   // Auxiliary methods to create metadata values and strings.
4128   auto &&GetMDInt = [this](unsigned V) {
4129     return llvm::ConstantAsMetadata::get(
4130         llvm::ConstantInt::get(CGM.Int32Ty, V));
4131   };
4132 
4133   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4134 
4135   // Create the offloading info metadata node.
4136   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4137 
4138   // Create function that emits metadata for each target region entry;
4139   auto &&TargetRegionMetadataEmitter =
4140       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4141        &GetMDString](
4142           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4143           unsigned Line,
4144           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4145         // Generate metadata for target regions. Each entry of this metadata
4146         // contains:
4147         // - Entry 0 -> Kind of this type of metadata (0).
4148         // - Entry 1 -> Device ID of the file where the entry was identified.
4149         // - Entry 2 -> File ID of the file where the entry was identified.
4150         // - Entry 3 -> Mangled name of the function where the entry was
4151         // identified.
4152         // - Entry 4 -> Line in the file where the entry was identified.
4153         // - Entry 5 -> Order the entry was created.
4154         // The first element of the metadata node is the kind.
4155         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4156                                  GetMDInt(FileID),      GetMDString(ParentName),
4157                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4158 
4159         SourceLocation Loc;
4160         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4161                   E = CGM.getContext().getSourceManager().fileinfo_end();
4162              I != E; ++I) {
4163           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4164               I->getFirst()->getUniqueID().getFile() == FileID) {
4165             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4166                 I->getFirst(), Line, 1);
4167             break;
4168           }
4169         }
4170         // Save this entry in the right position of the ordered entries array.
4171         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4172         ParentFunctions[E.getOrder()] = ParentName;
4173 
4174         // Add metadata to the named metadata node.
4175         MD->addOperand(llvm::MDNode::get(C, Ops));
4176       };
4177 
4178   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4179       TargetRegionMetadataEmitter);
4180 
4181   // Create function that emits metadata for each device global variable entry;
4182   auto &&DeviceGlobalVarMetadataEmitter =
4183       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4184        MD](StringRef MangledName,
4185            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4186                &E) {
4187         // Generate metadata for global variables. Each entry of this metadata
4188         // contains:
4189         // - Entry 0 -> Kind of this type of metadata (1).
4190         // - Entry 1 -> Mangled name of the variable.
4191         // - Entry 2 -> Declare target kind.
4192         // - Entry 3 -> Order the entry was created.
4193         // The first element of the metadata node is the kind.
4194         llvm::Metadata *Ops[] = {
4195             GetMDInt(E.getKind()), GetMDString(MangledName),
4196             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4197 
4198         // Save this entry in the right position of the ordered entries array.
4199         OrderedEntries[E.getOrder()] =
4200             std::make_tuple(&E, SourceLocation(), MangledName);
4201 
4202         // Add metadata to the named metadata node.
4203         MD->addOperand(llvm::MDNode::get(C, Ops));
4204       };
4205 
4206   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4207       DeviceGlobalVarMetadataEmitter);
4208 
4209   for (const auto &E : OrderedEntries) {
4210     assert(std::get<0>(E) && "All ordered entries must exist!");
4211     if (const auto *CE =
4212             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4213                 std::get<0>(E))) {
4214       if (!CE->getID() || !CE->getAddress()) {
4215         // Do not blame the entry if the parent funtion is not emitted.
4216         StringRef FnName = ParentFunctions[CE->getOrder()];
4217         if (!CGM.GetGlobalValue(FnName))
4218           continue;
4219         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4220             DiagnosticsEngine::Error,
4221             "Offloading entry for target region in %0 is incorrect: either the "
4222             "address or the ID is invalid.");
4223         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4224         continue;
4225       }
4226       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4227                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4228     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4229                                              OffloadEntryInfoDeviceGlobalVar>(
4230                    std::get<0>(E))) {
4231       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4232           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4233               CE->getFlags());
4234       switch (Flags) {
4235       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4236         if (CGM.getLangOpts().OpenMPIsDevice &&
4237             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4238           continue;
4239         if (!CE->getAddress()) {
4240           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4241               DiagnosticsEngine::Error, "Offloading entry for declare target "
4242                                         "variable %0 is incorrect: the "
4243                                         "address is invalid.");
4244           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4245           continue;
4246         }
4247         // The vaiable has no definition - no need to add the entry.
4248         if (CE->getVarSize().isZero())
4249           continue;
4250         break;
4251       }
4252       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4253         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4254                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4255                "Declaret target link address is set.");
4256         if (CGM.getLangOpts().OpenMPIsDevice)
4257           continue;
4258         if (!CE->getAddress()) {
4259           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4260               DiagnosticsEngine::Error,
4261               "Offloading entry for declare target variable is incorrect: the "
4262               "address is invalid.");
4263           CGM.getDiags().Report(DiagID);
4264           continue;
4265         }
4266         break;
4267       }
4268       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4269                          CE->getVarSize().getQuantity(), Flags,
4270                          CE->getLinkage());
4271     } else {
4272       llvm_unreachable("Unsupported entry kind.");
4273     }
4274   }
4275 }
4276 
4277 /// Loads all the offload entries information from the host IR
4278 /// metadata.
4279 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4280   // If we are in target mode, load the metadata from the host IR. This code has
4281   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4282 
4283   if (!CGM.getLangOpts().OpenMPIsDevice)
4284     return;
4285 
4286   if (CGM.getLangOpts().OMPHostIRFile.empty())
4287     return;
4288 
4289   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4290   if (auto EC = Buf.getError()) {
4291     CGM.getDiags().Report(diag::err_cannot_open_file)
4292         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4293     return;
4294   }
4295 
4296   llvm::LLVMContext C;
4297   auto ME = expectedToErrorOrAndEmitErrors(
4298       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4299 
4300   if (auto EC = ME.getError()) {
4301     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4302         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4303     CGM.getDiags().Report(DiagID)
4304         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4305     return;
4306   }
4307 
4308   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4309   if (!MD)
4310     return;
4311 
4312   for (llvm::MDNode *MN : MD->operands()) {
4313     auto &&GetMDInt = [MN](unsigned Idx) {
4314       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4315       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4316     };
4317 
4318     auto &&GetMDString = [MN](unsigned Idx) {
4319       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4320       return V->getString();
4321     };
4322 
4323     switch (GetMDInt(0)) {
4324     default:
4325       llvm_unreachable("Unexpected metadata!");
4326       break;
4327     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4328         OffloadingEntryInfoTargetRegion:
4329       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4330           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4331           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4332           /*Order=*/GetMDInt(5));
4333       break;
4334     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4335         OffloadingEntryInfoDeviceGlobalVar:
4336       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4337           /*MangledName=*/GetMDString(1),
4338           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4339               /*Flags=*/GetMDInt(2)),
4340           /*Order=*/GetMDInt(3));
4341       break;
4342     }
4343   }
4344 }
4345 
4346 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4347   if (!KmpRoutineEntryPtrTy) {
4348     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4349     ASTContext &C = CGM.getContext();
4350     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4351     FunctionProtoType::ExtProtoInfo EPI;
4352     KmpRoutineEntryPtrQTy = C.getPointerType(
4353         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4354     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4355   }
4356 }
4357 
4358 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4359   // Make sure the type of the entry is already created. This is the type we
4360   // have to create:
4361   // struct __tgt_offload_entry{
4362   //   void      *addr;       // Pointer to the offload entry info.
4363   //                          // (function or global)
4364   //   char      *name;       // Name of the function or global.
4365   //   size_t     size;       // Size of the entry info (0 if it a function).
4366   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4367   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4368   // };
4369   if (TgtOffloadEntryQTy.isNull()) {
4370     ASTContext &C = CGM.getContext();
4371     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4372     RD->startDefinition();
4373     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4374     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4375     addFieldToRecordDecl(C, RD, C.getSizeType());
4376     addFieldToRecordDecl(
4377         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4378     addFieldToRecordDecl(
4379         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4380     RD->completeDefinition();
4381     RD->addAttr(PackedAttr::CreateImplicit(C));
4382     TgtOffloadEntryQTy = C.getRecordType(RD);
4383   }
4384   return TgtOffloadEntryQTy;
4385 }
4386 
4387 namespace {
4388 struct PrivateHelpersTy {
4389   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4390                    const VarDecl *PrivateElemInit)
4391       : Original(Original), PrivateCopy(PrivateCopy),
4392         PrivateElemInit(PrivateElemInit) {}
4393   const VarDecl *Original;
4394   const VarDecl *PrivateCopy;
4395   const VarDecl *PrivateElemInit;
4396 };
4397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4398 } // anonymous namespace
4399 
4400 static RecordDecl *
4401 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4402   if (!Privates.empty()) {
4403     ASTContext &C = CGM.getContext();
4404     // Build struct .kmp_privates_t. {
4405     //         /*  private vars  */
4406     //       };
4407     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4408     RD->startDefinition();
4409     for (const auto &Pair : Privates) {
4410       const VarDecl *VD = Pair.second.Original;
4411       QualType Type = VD->getType().getNonReferenceType();
4412       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4413       if (VD->hasAttrs()) {
4414         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4415              E(VD->getAttrs().end());
4416              I != E; ++I)
4417           FD->addAttr(*I);
4418       }
4419     }
4420     RD->completeDefinition();
4421     return RD;
4422   }
4423   return nullptr;
4424 }
4425 
4426 static RecordDecl *
4427 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4428                          QualType KmpInt32Ty,
4429                          QualType KmpRoutineEntryPointerQTy) {
4430   ASTContext &C = CGM.getContext();
4431   // Build struct kmp_task_t {
4432   //         void *              shareds;
4433   //         kmp_routine_entry_t routine;
4434   //         kmp_int32           part_id;
4435   //         kmp_cmplrdata_t data1;
4436   //         kmp_cmplrdata_t data2;
4437   // For taskloops additional fields:
4438   //         kmp_uint64          lb;
4439   //         kmp_uint64          ub;
4440   //         kmp_int64           st;
4441   //         kmp_int32           liter;
4442   //         void *              reductions;
4443   //       };
4444   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4445   UD->startDefinition();
4446   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4447   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4448   UD->completeDefinition();
4449   QualType KmpCmplrdataTy = C.getRecordType(UD);
4450   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4451   RD->startDefinition();
4452   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4453   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4454   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4455   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4456   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4457   if (isOpenMPTaskLoopDirective(Kind)) {
4458     QualType KmpUInt64Ty =
4459         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4460     QualType KmpInt64Ty =
4461         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4462     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4463     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4464     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4465     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4466     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4467   }
4468   RD->completeDefinition();
4469   return RD;
4470 }
4471 
4472 static RecordDecl *
4473 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4474                                      ArrayRef<PrivateDataTy> Privates) {
4475   ASTContext &C = CGM.getContext();
4476   // Build struct kmp_task_t_with_privates {
4477   //         kmp_task_t task_data;
4478   //         .kmp_privates_t. privates;
4479   //       };
4480   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4481   RD->startDefinition();
4482   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4483   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4484     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4485   RD->completeDefinition();
4486   return RD;
4487 }
4488 
4489 /// Emit a proxy function which accepts kmp_task_t as the second
4490 /// argument.
4491 /// \code
4492 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4493 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4494 ///   For taskloops:
4495 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4496 ///   tt->reductions, tt->shareds);
4497 ///   return 0;
4498 /// }
4499 /// \endcode
4500 static llvm::Function *
4501 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4502                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4503                       QualType KmpTaskTWithPrivatesPtrQTy,
4504                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4505                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4506                       llvm::Value *TaskPrivatesMap) {
4507   ASTContext &C = CGM.getContext();
4508   FunctionArgList Args;
4509   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4510                             ImplicitParamDecl::Other);
4511   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4512                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4513                                 ImplicitParamDecl::Other);
4514   Args.push_back(&GtidArg);
4515   Args.push_back(&TaskTypeArg);
4516   const auto &TaskEntryFnInfo =
4517       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4518   llvm::FunctionType *TaskEntryTy =
4519       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4520   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4521   auto *TaskEntry = llvm::Function::Create(
4522       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4523   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4524   TaskEntry->setDoesNotRecurse();
4525   CodeGenFunction CGF(CGM);
4526   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4527                     Loc, Loc);
4528 
4529   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4530   // tt,
4531   // For taskloops:
4532   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4533   // tt->task_data.shareds);
4534   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4535       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4536   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4537       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4538       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4539   const auto *KmpTaskTWithPrivatesQTyRD =
4540       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4541   LValue Base =
4542       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4543   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4544   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4545   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4546   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4547 
4548   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4549   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4550   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4551       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4552       CGF.ConvertTypeForMem(SharedsPtrTy));
4553 
4554   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4555   llvm::Value *PrivatesParam;
4556   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4557     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4558     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4559         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4560   } else {
4561     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4562   }
4563 
4564   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4565                                TaskPrivatesMap,
4566                                CGF.Builder
4567                                    .CreatePointerBitCastOrAddrSpaceCast(
4568                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4569                                    .getPointer()};
4570   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4571                                           std::end(CommonArgs));
4572   if (isOpenMPTaskLoopDirective(Kind)) {
4573     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4574     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4575     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4576     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4577     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4578     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4579     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4580     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4581     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4582     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4583     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4584     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4585     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4586     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4587     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4588     CallArgs.push_back(LBParam);
4589     CallArgs.push_back(UBParam);
4590     CallArgs.push_back(StParam);
4591     CallArgs.push_back(LIParam);
4592     CallArgs.push_back(RParam);
4593   }
4594   CallArgs.push_back(SharedsParam);
4595 
4596   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4597                                                   CallArgs);
4598   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4599                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4600   CGF.FinishFunction();
4601   return TaskEntry;
4602 }
4603 
4604 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4605                                             SourceLocation Loc,
4606                                             QualType KmpInt32Ty,
4607                                             QualType KmpTaskTWithPrivatesPtrQTy,
4608                                             QualType KmpTaskTWithPrivatesQTy) {
4609   ASTContext &C = CGM.getContext();
4610   FunctionArgList Args;
4611   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4612                             ImplicitParamDecl::Other);
4613   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4614                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4615                                 ImplicitParamDecl::Other);
4616   Args.push_back(&GtidArg);
4617   Args.push_back(&TaskTypeArg);
4618   const auto &DestructorFnInfo =
4619       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4620   llvm::FunctionType *DestructorFnTy =
4621       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4622   std::string Name =
4623       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4624   auto *DestructorFn =
4625       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4626                              Name, &CGM.getModule());
4627   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4628                                     DestructorFnInfo);
4629   DestructorFn->setDoesNotRecurse();
4630   CodeGenFunction CGF(CGM);
4631   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4632                     Args, Loc, Loc);
4633 
4634   LValue Base = CGF.EmitLoadOfPointerLValue(
4635       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4636       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4637   const auto *KmpTaskTWithPrivatesQTyRD =
4638       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4639   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4640   Base = CGF.EmitLValueForField(Base, *FI);
4641   for (const auto *Field :
4642        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4643     if (QualType::DestructionKind DtorKind =
4644             Field->getType().isDestructedType()) {
4645       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4646       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4647     }
4648   }
4649   CGF.FinishFunction();
4650   return DestructorFn;
4651 }
4652 
4653 /// Emit a privates mapping function for correct handling of private and
4654 /// firstprivate variables.
4655 /// \code
4656 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4657 /// **noalias priv1,...,  <tyn> **noalias privn) {
4658 ///   *priv1 = &.privates.priv1;
4659 ///   ...;
4660 ///   *privn = &.privates.privn;
4661 /// }
4662 /// \endcode
4663 static llvm::Value *
4664 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4665                                ArrayRef<const Expr *> PrivateVars,
4666                                ArrayRef<const Expr *> FirstprivateVars,
4667                                ArrayRef<const Expr *> LastprivateVars,
4668                                QualType PrivatesQTy,
4669                                ArrayRef<PrivateDataTy> Privates) {
4670   ASTContext &C = CGM.getContext();
4671   FunctionArgList Args;
4672   ImplicitParamDecl TaskPrivatesArg(
4673       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4674       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4675       ImplicitParamDecl::Other);
4676   Args.push_back(&TaskPrivatesArg);
4677   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4678   unsigned Counter = 1;
4679   for (const Expr *E : PrivateVars) {
4680     Args.push_back(ImplicitParamDecl::Create(
4681         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4682         C.getPointerType(C.getPointerType(E->getType()))
4683             .withConst()
4684             .withRestrict(),
4685         ImplicitParamDecl::Other));
4686     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4687     PrivateVarsPos[VD] = Counter;
4688     ++Counter;
4689   }
4690   for (const Expr *E : FirstprivateVars) {
4691     Args.push_back(ImplicitParamDecl::Create(
4692         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4693         C.getPointerType(C.getPointerType(E->getType()))
4694             .withConst()
4695             .withRestrict(),
4696         ImplicitParamDecl::Other));
4697     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4698     PrivateVarsPos[VD] = Counter;
4699     ++Counter;
4700   }
4701   for (const Expr *E : LastprivateVars) {
4702     Args.push_back(ImplicitParamDecl::Create(
4703         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4704         C.getPointerType(C.getPointerType(E->getType()))
4705             .withConst()
4706             .withRestrict(),
4707         ImplicitParamDecl::Other));
4708     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4709     PrivateVarsPos[VD] = Counter;
4710     ++Counter;
4711   }
4712   const auto &TaskPrivatesMapFnInfo =
4713       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4714   llvm::FunctionType *TaskPrivatesMapTy =
4715       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4716   std::string Name =
4717       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4718   auto *TaskPrivatesMap = llvm::Function::Create(
4719       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4720       &CGM.getModule());
4721   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4722                                     TaskPrivatesMapFnInfo);
4723   if (CGM.getLangOpts().Optimize) {
4724     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4725     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4726     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4727   }
4728   CodeGenFunction CGF(CGM);
4729   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4730                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4731 
4732   // *privi = &.privates.privi;
4733   LValue Base = CGF.EmitLoadOfPointerLValue(
4734       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4735       TaskPrivatesArg.getType()->castAs<PointerType>());
4736   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4737   Counter = 0;
4738   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4739     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4740     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4741     LValue RefLVal =
4742         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4743     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4744         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4745     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4746     ++Counter;
4747   }
4748   CGF.FinishFunction();
4749   return TaskPrivatesMap;
4750 }
4751 
4752 /// Emit initialization for private variables in task-based directives.
4753 static void emitPrivatesInit(CodeGenFunction &CGF,
4754                              const OMPExecutableDirective &D,
4755                              Address KmpTaskSharedsPtr, LValue TDBase,
4756                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4757                              QualType SharedsTy, QualType SharedsPtrTy,
4758                              const OMPTaskDataTy &Data,
4759                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4760   ASTContext &C = CGF.getContext();
4761   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4762   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4763   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4764                                  ? OMPD_taskloop
4765                                  : OMPD_task;
4766   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4767   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4768   LValue SrcBase;
4769   bool IsTargetTask =
4770       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4771       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4772   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4773   // PointersArray and SizesArray. The original variables for these arrays are
4774   // not captured and we get their addresses explicitly.
4775   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4776       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4777     SrcBase = CGF.MakeAddrLValue(
4778         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4779             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4780         SharedsTy);
4781   }
4782   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4783   for (const PrivateDataTy &Pair : Privates) {
4784     const VarDecl *VD = Pair.second.PrivateCopy;
4785     const Expr *Init = VD->getAnyInitializer();
4786     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4787                              !CGF.isTrivialInitializer(Init)))) {
4788       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4789       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4790         const VarDecl *OriginalVD = Pair.second.Original;
4791         // Check if the variable is the target-based BasePointersArray,
4792         // PointersArray or SizesArray.
4793         LValue SharedRefLValue;
4794         QualType Type = PrivateLValue.getType();
4795         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4796         if (IsTargetTask && !SharedField) {
4797           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4798                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4799                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4800                          ->getNumParams() == 0 &&
4801                  isa<TranslationUnitDecl>(
4802                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4803                          ->getDeclContext()) &&
4804                  "Expected artificial target data variable.");
4805           SharedRefLValue =
4806               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4807         } else {
4808           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4809           SharedRefLValue = CGF.MakeAddrLValue(
4810               Address(SharedRefLValue.getPointer(CGF),
4811                       C.getDeclAlign(OriginalVD)),
4812               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4813               SharedRefLValue.getTBAAInfo());
4814         }
4815         if (Type->isArrayType()) {
4816           // Initialize firstprivate array.
4817           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4818             // Perform simple memcpy.
4819             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4820           } else {
4821             // Initialize firstprivate array using element-by-element
4822             // initialization.
4823             CGF.EmitOMPAggregateAssign(
4824                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4825                 Type,
4826                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4827                                                   Address SrcElement) {
4828                   // Clean up any temporaries needed by the initialization.
4829                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4830                   InitScope.addPrivate(
4831                       Elem, [SrcElement]() -> Address { return SrcElement; });
4832                   (void)InitScope.Privatize();
4833                   // Emit initialization for single element.
4834                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4835                       CGF, &CapturesInfo);
4836                   CGF.EmitAnyExprToMem(Init, DestElement,
4837                                        Init->getType().getQualifiers(),
4838                                        /*IsInitializer=*/false);
4839                 });
4840           }
4841         } else {
4842           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4843           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4844             return SharedRefLValue.getAddress(CGF);
4845           });
4846           (void)InitScope.Privatize();
4847           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4848           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4849                              /*capturedByInit=*/false);
4850         }
4851       } else {
4852         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4853       }
4854     }
4855     ++FI;
4856   }
4857 }
4858 
4859 /// Check if duplication function is required for taskloops.
4860 static bool checkInitIsRequired(CodeGenFunction &CGF,
4861                                 ArrayRef<PrivateDataTy> Privates) {
4862   bool InitRequired = false;
4863   for (const PrivateDataTy &Pair : Privates) {
4864     const VarDecl *VD = Pair.second.PrivateCopy;
4865     const Expr *Init = VD->getAnyInitializer();
4866     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4867                                     !CGF.isTrivialInitializer(Init));
4868     if (InitRequired)
4869       break;
4870   }
4871   return InitRequired;
4872 }
4873 
4874 
4875 /// Emit task_dup function (for initialization of
4876 /// private/firstprivate/lastprivate vars and last_iter flag)
4877 /// \code
4878 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4879 /// lastpriv) {
4880 /// // setup lastprivate flag
4881 ///    task_dst->last = lastpriv;
4882 /// // could be constructor calls here...
4883 /// }
4884 /// \endcode
4885 static llvm::Value *
4886 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4887                     const OMPExecutableDirective &D,
4888                     QualType KmpTaskTWithPrivatesPtrQTy,
4889                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4890                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4891                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4892                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4893   ASTContext &C = CGM.getContext();
4894   FunctionArgList Args;
4895   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4896                            KmpTaskTWithPrivatesPtrQTy,
4897                            ImplicitParamDecl::Other);
4898   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4899                            KmpTaskTWithPrivatesPtrQTy,
4900                            ImplicitParamDecl::Other);
4901   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4902                                 ImplicitParamDecl::Other);
4903   Args.push_back(&DstArg);
4904   Args.push_back(&SrcArg);
4905   Args.push_back(&LastprivArg);
4906   const auto &TaskDupFnInfo =
4907       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4908   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4909   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4910   auto *TaskDup = llvm::Function::Create(
4911       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4912   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4913   TaskDup->setDoesNotRecurse();
4914   CodeGenFunction CGF(CGM);
4915   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4916                     Loc);
4917 
4918   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4919       CGF.GetAddrOfLocalVar(&DstArg),
4920       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4921   // task_dst->liter = lastpriv;
4922   if (WithLastIter) {
4923     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4924     LValue Base = CGF.EmitLValueForField(
4925         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4926     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4927     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4928         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4929     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4930   }
4931 
4932   // Emit initial values for private copies (if any).
4933   assert(!Privates.empty());
4934   Address KmpTaskSharedsPtr = Address::invalid();
4935   if (!Data.FirstprivateVars.empty()) {
4936     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4937         CGF.GetAddrOfLocalVar(&SrcArg),
4938         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4939     LValue Base = CGF.EmitLValueForField(
4940         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4941     KmpTaskSharedsPtr = Address(
4942         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4943                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4944                                                   KmpTaskTShareds)),
4945                              Loc),
4946         CGF.getNaturalTypeAlignment(SharedsTy));
4947   }
4948   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4949                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4950   CGF.FinishFunction();
4951   return TaskDup;
4952 }
4953 
4954 /// Checks if destructor function is required to be generated.
4955 /// \return true if cleanups are required, false otherwise.
4956 static bool
4957 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4958   bool NeedsCleanup = false;
4959   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4960   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4961   for (const FieldDecl *FD : PrivateRD->fields()) {
4962     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4963     if (NeedsCleanup)
4964       break;
4965   }
4966   return NeedsCleanup;
4967 }
4968 
4969 CGOpenMPRuntime::TaskResultTy
4970 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4971                               const OMPExecutableDirective &D,
4972                               llvm::Function *TaskFunction, QualType SharedsTy,
4973                               Address Shareds, const OMPTaskDataTy &Data) {
4974   ASTContext &C = CGM.getContext();
4975   llvm::SmallVector<PrivateDataTy, 4> Privates;
4976   // Aggregate privates and sort them by the alignment.
4977   auto I = Data.PrivateCopies.begin();
4978   for (const Expr *E : Data.PrivateVars) {
4979     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4980     Privates.emplace_back(
4981         C.getDeclAlign(VD),
4982         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4983                          /*PrivateElemInit=*/nullptr));
4984     ++I;
4985   }
4986   I = Data.FirstprivateCopies.begin();
4987   auto IElemInitRef = Data.FirstprivateInits.begin();
4988   for (const Expr *E : Data.FirstprivateVars) {
4989     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4990     Privates.emplace_back(
4991         C.getDeclAlign(VD),
4992         PrivateHelpersTy(
4993             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4994             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4995     ++I;
4996     ++IElemInitRef;
4997   }
4998   I = Data.LastprivateCopies.begin();
4999   for (const Expr *E : Data.LastprivateVars) {
5000     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5001     Privates.emplace_back(
5002         C.getDeclAlign(VD),
5003         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5004                          /*PrivateElemInit=*/nullptr));
5005     ++I;
5006   }
5007   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5008     return L.first > R.first;
5009   });
5010   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5011   // Build type kmp_routine_entry_t (if not built yet).
5012   emitKmpRoutineEntryT(KmpInt32Ty);
5013   // Build type kmp_task_t (if not built yet).
5014   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5015     if (SavedKmpTaskloopTQTy.isNull()) {
5016       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5017           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5018     }
5019     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5020   } else {
5021     assert((D.getDirectiveKind() == OMPD_task ||
5022             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5023             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5024            "Expected taskloop, task or target directive");
5025     if (SavedKmpTaskTQTy.isNull()) {
5026       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5027           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5028     }
5029     KmpTaskTQTy = SavedKmpTaskTQTy;
5030   }
5031   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5032   // Build particular struct kmp_task_t for the given task.
5033   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5034       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5035   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5036   QualType KmpTaskTWithPrivatesPtrQTy =
5037       C.getPointerType(KmpTaskTWithPrivatesQTy);
5038   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5039   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5040       KmpTaskTWithPrivatesTy->getPointerTo();
5041   llvm::Value *KmpTaskTWithPrivatesTySize =
5042       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5043   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5044 
5045   // Emit initial values for private copies (if any).
5046   llvm::Value *TaskPrivatesMap = nullptr;
5047   llvm::Type *TaskPrivatesMapTy =
5048       std::next(TaskFunction->arg_begin(), 3)->getType();
5049   if (!Privates.empty()) {
5050     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5051     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5052         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5053         FI->getType(), Privates);
5054     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5055         TaskPrivatesMap, TaskPrivatesMapTy);
5056   } else {
5057     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5058         cast<llvm::PointerType>(TaskPrivatesMapTy));
5059   }
5060   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5061   // kmp_task_t *tt);
5062   llvm::Function *TaskEntry = emitProxyTaskFunction(
5063       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5064       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5065       TaskPrivatesMap);
5066 
5067   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5068   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5069   // kmp_routine_entry_t *task_entry);
5070   // Task flags. Format is taken from
5071   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5072   // description of kmp_tasking_flags struct.
5073   enum {
5074     TiedFlag = 0x1,
5075     FinalFlag = 0x2,
5076     DestructorsFlag = 0x8,
5077     PriorityFlag = 0x20
5078   };
5079   unsigned Flags = Data.Tied ? TiedFlag : 0;
5080   bool NeedsCleanup = false;
5081   if (!Privates.empty()) {
5082     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5083     if (NeedsCleanup)
5084       Flags = Flags | DestructorsFlag;
5085   }
5086   if (Data.Priority.getInt())
5087     Flags = Flags | PriorityFlag;
5088   llvm::Value *TaskFlags =
5089       Data.Final.getPointer()
5090           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5091                                      CGF.Builder.getInt32(FinalFlag),
5092                                      CGF.Builder.getInt32(/*C=*/0))
5093           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5094   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5095   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5096   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5097       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5098       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5099           TaskEntry, KmpRoutineEntryPtrTy)};
5100   llvm::Value *NewTask;
5101   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5102     // Check if we have any device clause associated with the directive.
5103     const Expr *Device = nullptr;
5104     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5105       Device = C->getDevice();
5106     // Emit device ID if any otherwise use default value.
5107     llvm::Value *DeviceID;
5108     if (Device)
5109       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5110                                            CGF.Int64Ty, /*isSigned=*/true);
5111     else
5112       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5113     AllocArgs.push_back(DeviceID);
5114     NewTask = CGF.EmitRuntimeCall(
5115       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5116   } else {
5117     NewTask = CGF.EmitRuntimeCall(
5118       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5119   }
5120   llvm::Value *NewTaskNewTaskTTy =
5121       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5122           NewTask, KmpTaskTWithPrivatesPtrTy);
5123   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5124                                                KmpTaskTWithPrivatesQTy);
5125   LValue TDBase =
5126       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5127   // Fill the data in the resulting kmp_task_t record.
5128   // Copy shareds if there are any.
5129   Address KmpTaskSharedsPtr = Address::invalid();
5130   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5131     KmpTaskSharedsPtr =
5132         Address(CGF.EmitLoadOfScalar(
5133                     CGF.EmitLValueForField(
5134                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5135                                            KmpTaskTShareds)),
5136                     Loc),
5137                 CGF.getNaturalTypeAlignment(SharedsTy));
5138     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5139     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5140     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5141   }
5142   // Emit initial values for private copies (if any).
5143   TaskResultTy Result;
5144   if (!Privates.empty()) {
5145     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5146                      SharedsTy, SharedsPtrTy, Data, Privates,
5147                      /*ForDup=*/false);
5148     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5149         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5150       Result.TaskDupFn = emitTaskDupFunction(
5151           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5152           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5153           /*WithLastIter=*/!Data.LastprivateVars.empty());
5154     }
5155   }
5156   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5157   enum { Priority = 0, Destructors = 1 };
5158   // Provide pointer to function with destructors for privates.
5159   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5160   const RecordDecl *KmpCmplrdataUD =
5161       (*FI)->getType()->getAsUnionType()->getDecl();
5162   if (NeedsCleanup) {
5163     llvm::Value *DestructorFn = emitDestructorsFunction(
5164         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5165         KmpTaskTWithPrivatesQTy);
5166     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5167     LValue DestructorsLV = CGF.EmitLValueForField(
5168         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5169     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5170                               DestructorFn, KmpRoutineEntryPtrTy),
5171                           DestructorsLV);
5172   }
5173   // Set priority.
5174   if (Data.Priority.getInt()) {
5175     LValue Data2LV = CGF.EmitLValueForField(
5176         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5177     LValue PriorityLV = CGF.EmitLValueForField(
5178         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5179     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5180   }
5181   Result.NewTask = NewTask;
5182   Result.TaskEntry = TaskEntry;
5183   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5184   Result.TDBase = TDBase;
5185   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5186   return Result;
5187 }
5188 
5189 namespace {
5190 /// Dependence kind for RTL.
5191 enum RTLDependenceKindTy {
5192   DepIn = 0x01,
5193   DepInOut = 0x3,
5194   DepMutexInOutSet = 0x4
5195 };
5196 /// Fields ids in kmp_depend_info record.
5197 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5198 } // namespace
5199 
5200 /// Translates internal dependency kind into the runtime kind.
5201 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5202   RTLDependenceKindTy DepKind;
5203   switch (K) {
5204   case OMPC_DEPEND_in:
5205     DepKind = DepIn;
5206     break;
5207   // Out and InOut dependencies must use the same code.
5208   case OMPC_DEPEND_out:
5209   case OMPC_DEPEND_inout:
5210     DepKind = DepInOut;
5211     break;
5212   case OMPC_DEPEND_mutexinoutset:
5213     DepKind = DepMutexInOutSet;
5214     break;
5215   case OMPC_DEPEND_source:
5216   case OMPC_DEPEND_sink:
5217   case OMPC_DEPEND_depobj:
5218   case OMPC_DEPEND_unknown:
5219     llvm_unreachable("Unknown task dependence type");
5220   }
5221   return DepKind;
5222 }
5223 
5224 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5225 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5226                            QualType &FlagsTy) {
5227   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5228   if (KmpDependInfoTy.isNull()) {
5229     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5230     KmpDependInfoRD->startDefinition();
5231     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5232     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5233     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5234     KmpDependInfoRD->completeDefinition();
5235     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5236   }
5237 }
5238 
5239 std::pair<llvm::Value *, LValue>
5240 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5241                                    SourceLocation Loc) {
5242   ASTContext &C = CGM.getContext();
5243   QualType FlagsTy;
5244   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5245   RecordDecl *KmpDependInfoRD =
5246       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5247   LValue Base = CGF.EmitLoadOfPointerLValue(
5248       DepobjLVal.getAddress(CGF),
5249       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5250   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5251   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5253   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5254                             Base.getTBAAInfo());
5255   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5256       Addr.getPointer(),
5257       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5258   LValue NumDepsBase = CGF.MakeAddrLValue(
5259       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5260       Base.getBaseInfo(), Base.getTBAAInfo());
5261   // NumDeps = deps[i].base_addr;
5262   LValue BaseAddrLVal = CGF.EmitLValueForField(
5263       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5264   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5265   return std::make_pair(NumDeps, Base);
5266 }
5267 
5268 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5269     CodeGenFunction &CGF,
5270     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
5271     bool ForDepobj, SourceLocation Loc) {
5272   // Process list of dependencies.
5273   ASTContext &C = CGM.getContext();
5274   Address DependenciesArray = Address::invalid();
5275   unsigned NumDependencies = Dependencies.size();
5276   llvm::Value *NumOfElements = nullptr;
5277   if (NumDependencies) {
5278     QualType FlagsTy;
5279     getDependTypes(C, KmpDependInfoTy, FlagsTy);
5280     RecordDecl *KmpDependInfoRD =
5281         cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5282     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5283     unsigned NumDepobjDependecies = 0;
5284     SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs;
5285     llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5286     // Calculate number of depobj dependecies.
5287     for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair :
5288          Dependencies) {
5289       if (Pair.first != OMPC_DEPEND_depobj)
5290         continue;
5291       LValue DepobjLVal = CGF.EmitLValue(Pair.second);
5292       llvm::Value *NumDeps;
5293       LValue Base;
5294       std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5295       NumOfDepobjElements =
5296           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps);
5297       Depobjs.emplace_back(NumDeps, Base);
5298       ++NumDepobjDependecies;
5299     }
5300 
5301     QualType KmpDependInfoArrayTy;
5302     // Define type kmp_depend_info[<Dependencies.size()>];
5303     // For depobj reserve one extra element to store the number of elements.
5304     // It is required to handle depobj(x) update(in) construct.
5305     // kmp_depend_info[<Dependencies.size()>] deps;
5306     if (ForDepobj) {
5307       assert(NumDepobjDependecies == 0 &&
5308              "depobj dependency kind is not expected in depobj directive.");
5309       KmpDependInfoArrayTy = C.getConstantArrayType(
5310           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5311           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5312       // Need to allocate on the dynamic memory.
5313       llvm::Value *ThreadID = getThreadID(CGF, Loc);
5314       // Use default allocator.
5315       llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5316       CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy);
5317       CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5318       llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align));
5319       llvm::Value *Args[] = {ThreadID, Size, Allocator};
5320 
5321       llvm::Value *Addr = CGF.EmitRuntimeCall(
5322           createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5323       Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5324           Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo());
5325       DependenciesArray = Address(Addr, Align);
5326       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5327                                              /*isSigned=*/false);
5328     } else if (NumDepobjDependecies > 0) {
5329       NumOfElements = CGF.Builder.CreateNUWAdd(
5330           NumOfDepobjElements,
5331           llvm::ConstantInt::get(CGM.IntPtrTy,
5332                                  NumDependencies - NumDepobjDependecies,
5333                                  /*isSigned=*/false));
5334       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5335                                                 /*isSigned=*/false);
5336       OpaqueValueExpr OVE(
5337           Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5338           VK_RValue);
5339       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5340                                                     RValue::get(NumOfElements));
5341       KmpDependInfoArrayTy =
5342           C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5343                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5344       // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5345       // Properly emit variable-sized array.
5346       auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5347                                            ImplicitParamDecl::Other);
5348       CGF.EmitVarDecl(*PD);
5349       DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5350     } else {
5351       KmpDependInfoArrayTy = C.getConstantArrayType(
5352           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5353           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5354       DependenciesArray =
5355           CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5356       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5357                                              /*isSigned=*/false);
5358     }
5359     if (ForDepobj) {
5360       // Write number of elements in the first element of array for depobj.
5361       llvm::Value *NumVal =
5362           llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5363       LValue Base = CGF.MakeAddrLValue(
5364           CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0),
5365           KmpDependInfoTy);
5366       // deps[i].base_addr = NumDependencies;
5367       LValue BaseAddrLVal = CGF.EmitLValueForField(
5368           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5369       CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal);
5370     }
5371     unsigned Pos = ForDepobj ? 1 : 0;
5372     for (unsigned I = 0; I < NumDependencies; ++I) {
5373       if (Dependencies[I].first == OMPC_DEPEND_depobj)
5374         continue;
5375       const Expr *E = Dependencies[I].second;
5376       LValue Addr = CGF.EmitLValue(E);
5377       llvm::Value *Size;
5378       QualType Ty = E->getType();
5379       if (const auto *ASE =
5380               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5381         LValue UpAddrLVal =
5382             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5383         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5384             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5385         llvm::Value *LowIntPtr =
5386             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5387         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5388         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5389       } else {
5390         Size = CGF.getTypeSize(Ty);
5391       }
5392       LValue Base;
5393       if (NumDepobjDependecies > 0) {
5394         Base = CGF.MakeAddrLValue(
5395             CGF.Builder.CreateConstGEP(DependenciesArray, Pos),
5396             KmpDependInfoTy);
5397       } else {
5398         Base = CGF.MakeAddrLValue(
5399             CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos),
5400             KmpDependInfoTy);
5401       }
5402       // deps[i].base_addr = &<Dependencies[i].second>;
5403       LValue BaseAddrLVal = CGF.EmitLValueForField(
5404           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5405       CGF.EmitStoreOfScalar(
5406           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5407           BaseAddrLVal);
5408       // deps[i].len = sizeof(<Dependencies[i].second>);
5409       LValue LenLVal = CGF.EmitLValueForField(
5410           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5411       CGF.EmitStoreOfScalar(Size, LenLVal);
5412       // deps[i].flags = <Dependencies[i].first>;
5413       RTLDependenceKindTy DepKind =
5414           translateDependencyKind(Dependencies[I].first);
5415       LValue FlagsLVal = CGF.EmitLValueForField(
5416           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5417       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5418                             FlagsLVal);
5419       ++Pos;
5420     }
5421     // Copy final depobj arrays.
5422     if (NumDepobjDependecies > 0) {
5423       llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5424       Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos);
5425       for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) {
5426         llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first);
5427         CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size);
5428         Addr =
5429             Address(CGF.Builder.CreateGEP(
5430                         Addr.getElementType(), Addr.getPointer(), Pair.first),
5431                     DependenciesArray.getAlignment().alignmentOfArrayElement(
5432                         C.getTypeSizeInChars(KmpDependInfoTy)));
5433       }
5434       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435           DependenciesArray, CGF.VoidPtrTy);
5436     } else {
5437       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5438           CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0),
5439           CGF.VoidPtrTy);
5440     }
5441   }
5442   return std::make_pair(NumOfElements, DependenciesArray);
5443 }
5444 
5445 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5446                                         SourceLocation Loc) {
5447   ASTContext &C = CGM.getContext();
5448   QualType FlagsTy;
5449   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5450   LValue Base = CGF.EmitLoadOfPointerLValue(
5451       DepobjLVal.getAddress(CGF),
5452       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5453   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5454   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5455       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5456   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5457       Addr.getPointer(),
5458       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5459   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5460                                                                CGF.VoidPtrTy);
5461   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5462   // Use default allocator.
5463   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5464   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5465 
5466   // _kmpc_free(gtid, addr, nullptr);
5467   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5468 }
5469 
5470 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5471                                        OpenMPDependClauseKind NewDepKind,
5472                                        SourceLocation Loc) {
5473   ASTContext &C = CGM.getContext();
5474   QualType FlagsTy;
5475   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5476   RecordDecl *KmpDependInfoRD =
5477       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5478   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5479   llvm::Value *NumDeps;
5480   LValue Base;
5481   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5482 
5483   Address Begin = Base.getAddress(CGF);
5484   // Cast from pointer to array type to pointer to single element.
5485   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5486   // The basic structure here is a while-do loop.
5487   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5488   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5489   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5490   CGF.EmitBlock(BodyBB);
5491   llvm::PHINode *ElementPHI =
5492       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5493   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5494   Begin = Address(ElementPHI, Begin.getAlignment());
5495   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5496                             Base.getTBAAInfo());
5497   // deps[i].flags = NewDepKind;
5498   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5499   LValue FlagsLVal = CGF.EmitLValueForField(
5500       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5501   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5502                         FlagsLVal);
5503 
5504   // Shift the address forward by one element.
5505   Address ElementNext =
5506       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5507   ElementPHI->addIncoming(ElementNext.getPointer(),
5508                           CGF.Builder.GetInsertBlock());
5509   llvm::Value *IsEmpty =
5510       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5511   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5512   // Done.
5513   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5514 }
5515 
5516 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5517                                    const OMPExecutableDirective &D,
5518                                    llvm::Function *TaskFunction,
5519                                    QualType SharedsTy, Address Shareds,
5520                                    const Expr *IfCond,
5521                                    const OMPTaskDataTy &Data) {
5522   if (!CGF.HaveInsertPoint())
5523     return;
5524 
5525   TaskResultTy Result =
5526       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5527   llvm::Value *NewTask = Result.NewTask;
5528   llvm::Function *TaskEntry = Result.TaskEntry;
5529   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5530   LValue TDBase = Result.TDBase;
5531   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5532   // Process list of dependences.
5533   Address DependenciesArray = Address::invalid();
5534   llvm::Value *NumOfElements;
5535   std::tie(NumOfElements, DependenciesArray) =
5536       emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc);
5537 
5538   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5539   // libcall.
5540   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5541   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5542   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5543   // list is not empty
5544   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5545   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5546   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5547   llvm::Value *DepTaskArgs[7];
5548   if (!Data.Dependences.empty()) {
5549     DepTaskArgs[0] = UpLoc;
5550     DepTaskArgs[1] = ThreadID;
5551     DepTaskArgs[2] = NewTask;
5552     DepTaskArgs[3] = NumOfElements;
5553     DepTaskArgs[4] = DependenciesArray.getPointer();
5554     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5555     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5556   }
5557   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5558                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5559     if (!Data.Tied) {
5560       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5561       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5562       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5563     }
5564     if (!Data.Dependences.empty()) {
5565       CGF.EmitRuntimeCall(
5566           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5567     } else {
5568       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5569                           TaskArgs);
5570     }
5571     // Check if parent region is untied and build return for untied task;
5572     if (auto *Region =
5573             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5574       Region->emitUntiedSwitch(CGF);
5575   };
5576 
5577   llvm::Value *DepWaitTaskArgs[6];
5578   if (!Data.Dependences.empty()) {
5579     DepWaitTaskArgs[0] = UpLoc;
5580     DepWaitTaskArgs[1] = ThreadID;
5581     DepWaitTaskArgs[2] = NumOfElements;
5582     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5583     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5584     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5585   }
5586   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5587                         &Data, &DepWaitTaskArgs,
5588                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5589     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5590     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5591     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5592     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5593     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5594     // is specified.
5595     if (!Data.Dependences.empty())
5596       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5597                           DepWaitTaskArgs);
5598     // Call proxy_task_entry(gtid, new_task);
5599     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5600                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5601       Action.Enter(CGF);
5602       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5603       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5604                                                           OutlinedFnArgs);
5605     };
5606 
5607     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5608     // kmp_task_t *new_task);
5609     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5610     // kmp_task_t *new_task);
5611     RegionCodeGenTy RCG(CodeGen);
5612     CommonActionTy Action(
5613         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5614         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5615     RCG.setAction(Action);
5616     RCG(CGF);
5617   };
5618 
5619   if (IfCond) {
5620     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5621   } else {
5622     RegionCodeGenTy ThenRCG(ThenCodeGen);
5623     ThenRCG(CGF);
5624   }
5625 }
5626 
5627 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5628                                        const OMPLoopDirective &D,
5629                                        llvm::Function *TaskFunction,
5630                                        QualType SharedsTy, Address Shareds,
5631                                        const Expr *IfCond,
5632                                        const OMPTaskDataTy &Data) {
5633   if (!CGF.HaveInsertPoint())
5634     return;
5635   TaskResultTy Result =
5636       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5637   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5638   // libcall.
5639   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5640   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5641   // sched, kmp_uint64 grainsize, void *task_dup);
5642   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5643   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5644   llvm::Value *IfVal;
5645   if (IfCond) {
5646     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5647                                       /*isSigned=*/true);
5648   } else {
5649     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5650   }
5651 
5652   LValue LBLVal = CGF.EmitLValueForField(
5653       Result.TDBase,
5654       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5655   const auto *LBVar =
5656       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5657   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5658                        LBLVal.getQuals(),
5659                        /*IsInitializer=*/true);
5660   LValue UBLVal = CGF.EmitLValueForField(
5661       Result.TDBase,
5662       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5663   const auto *UBVar =
5664       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5665   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5666                        UBLVal.getQuals(),
5667                        /*IsInitializer=*/true);
5668   LValue StLVal = CGF.EmitLValueForField(
5669       Result.TDBase,
5670       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5671   const auto *StVar =
5672       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5673   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5674                        StLVal.getQuals(),
5675                        /*IsInitializer=*/true);
5676   // Store reductions address.
5677   LValue RedLVal = CGF.EmitLValueForField(
5678       Result.TDBase,
5679       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5680   if (Data.Reductions) {
5681     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5682   } else {
5683     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5684                                CGF.getContext().VoidPtrTy);
5685   }
5686   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5687   llvm::Value *TaskArgs[] = {
5688       UpLoc,
5689       ThreadID,
5690       Result.NewTask,
5691       IfVal,
5692       LBLVal.getPointer(CGF),
5693       UBLVal.getPointer(CGF),
5694       CGF.EmitLoadOfScalar(StLVal, Loc),
5695       llvm::ConstantInt::getSigned(
5696           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5697       llvm::ConstantInt::getSigned(
5698           CGF.IntTy, Data.Schedule.getPointer()
5699                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5700                          : NoSchedule),
5701       Data.Schedule.getPointer()
5702           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5703                                       /*isSigned=*/false)
5704           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5705       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5706                              Result.TaskDupFn, CGF.VoidPtrTy)
5707                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5708   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5709 }
5710 
5711 /// Emit reduction operation for each element of array (required for
5712 /// array sections) LHS op = RHS.
5713 /// \param Type Type of array.
5714 /// \param LHSVar Variable on the left side of the reduction operation
5715 /// (references element of array in original variable).
5716 /// \param RHSVar Variable on the right side of the reduction operation
5717 /// (references element of array in original variable).
5718 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5719 /// RHSVar.
5720 static void EmitOMPAggregateReduction(
5721     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5722     const VarDecl *RHSVar,
5723     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5724                                   const Expr *, const Expr *)> &RedOpGen,
5725     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5726     const Expr *UpExpr = nullptr) {
5727   // Perform element-by-element initialization.
5728   QualType ElementTy;
5729   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5730   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5731 
5732   // Drill down to the base element type on both arrays.
5733   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5734   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5735 
5736   llvm::Value *RHSBegin = RHSAddr.getPointer();
5737   llvm::Value *LHSBegin = LHSAddr.getPointer();
5738   // Cast from pointer to array type to pointer to single element.
5739   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5740   // The basic structure here is a while-do loop.
5741   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5742   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5743   llvm::Value *IsEmpty =
5744       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5745   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5746 
5747   // Enter the loop body, making that address the current address.
5748   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5749   CGF.EmitBlock(BodyBB);
5750 
5751   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5752 
5753   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5754       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5755   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5756   Address RHSElementCurrent =
5757       Address(RHSElementPHI,
5758               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5759 
5760   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5761       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5762   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5763   Address LHSElementCurrent =
5764       Address(LHSElementPHI,
5765               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5766 
5767   // Emit copy.
5768   CodeGenFunction::OMPPrivateScope Scope(CGF);
5769   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5770   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5771   Scope.Privatize();
5772   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5773   Scope.ForceCleanup();
5774 
5775   // Shift the address forward by one element.
5776   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5777       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5778   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5779       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5780   // Check whether we've reached the end.
5781   llvm::Value *Done =
5782       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5783   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5784   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5785   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5786 
5787   // Done.
5788   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5789 }
5790 
5791 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5792 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5793 /// UDR combiner function.
5794 static void emitReductionCombiner(CodeGenFunction &CGF,
5795                                   const Expr *ReductionOp) {
5796   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5797     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5798       if (const auto *DRE =
5799               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5800         if (const auto *DRD =
5801                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5802           std::pair<llvm::Function *, llvm::Function *> Reduction =
5803               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5804           RValue Func = RValue::get(Reduction.first);
5805           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5806           CGF.EmitIgnoredExpr(ReductionOp);
5807           return;
5808         }
5809   CGF.EmitIgnoredExpr(ReductionOp);
5810 }
5811 
5812 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5813     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5814     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5815     ArrayRef<const Expr *> ReductionOps) {
5816   ASTContext &C = CGM.getContext();
5817 
5818   // void reduction_func(void *LHSArg, void *RHSArg);
5819   FunctionArgList Args;
5820   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5821                            ImplicitParamDecl::Other);
5822   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5823                            ImplicitParamDecl::Other);
5824   Args.push_back(&LHSArg);
5825   Args.push_back(&RHSArg);
5826   const auto &CGFI =
5827       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5828   std::string Name = getName({"omp", "reduction", "reduction_func"});
5829   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5830                                     llvm::GlobalValue::InternalLinkage, Name,
5831                                     &CGM.getModule());
5832   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5833   Fn->setDoesNotRecurse();
5834   CodeGenFunction CGF(CGM);
5835   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5836 
5837   // Dst = (void*[n])(LHSArg);
5838   // Src = (void*[n])(RHSArg);
5839   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5840       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5841       ArgsType), CGF.getPointerAlign());
5842   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5843       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5844       ArgsType), CGF.getPointerAlign());
5845 
5846   //  ...
5847   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5848   //  ...
5849   CodeGenFunction::OMPPrivateScope Scope(CGF);
5850   auto IPriv = Privates.begin();
5851   unsigned Idx = 0;
5852   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5853     const auto *RHSVar =
5854         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5855     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5856       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5857     });
5858     const auto *LHSVar =
5859         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5860     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5861       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5862     });
5863     QualType PrivTy = (*IPriv)->getType();
5864     if (PrivTy->isVariablyModifiedType()) {
5865       // Get array size and emit VLA type.
5866       ++Idx;
5867       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5868       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5869       const VariableArrayType *VLA =
5870           CGF.getContext().getAsVariableArrayType(PrivTy);
5871       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5872       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5873           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5874       CGF.EmitVariablyModifiedType(PrivTy);
5875     }
5876   }
5877   Scope.Privatize();
5878   IPriv = Privates.begin();
5879   auto ILHS = LHSExprs.begin();
5880   auto IRHS = RHSExprs.begin();
5881   for (const Expr *E : ReductionOps) {
5882     if ((*IPriv)->getType()->isArrayType()) {
5883       // Emit reduction for array section.
5884       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5885       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5886       EmitOMPAggregateReduction(
5887           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5888           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5889             emitReductionCombiner(CGF, E);
5890           });
5891     } else {
5892       // Emit reduction for array subscript or single variable.
5893       emitReductionCombiner(CGF, E);
5894     }
5895     ++IPriv;
5896     ++ILHS;
5897     ++IRHS;
5898   }
5899   Scope.ForceCleanup();
5900   CGF.FinishFunction();
5901   return Fn;
5902 }
5903 
5904 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5905                                                   const Expr *ReductionOp,
5906                                                   const Expr *PrivateRef,
5907                                                   const DeclRefExpr *LHS,
5908                                                   const DeclRefExpr *RHS) {
5909   if (PrivateRef->getType()->isArrayType()) {
5910     // Emit reduction for array section.
5911     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5912     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5913     EmitOMPAggregateReduction(
5914         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5915         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5916           emitReductionCombiner(CGF, ReductionOp);
5917         });
5918   } else {
5919     // Emit reduction for array subscript or single variable.
5920     emitReductionCombiner(CGF, ReductionOp);
5921   }
5922 }
5923 
5924 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5925                                     ArrayRef<const Expr *> Privates,
5926                                     ArrayRef<const Expr *> LHSExprs,
5927                                     ArrayRef<const Expr *> RHSExprs,
5928                                     ArrayRef<const Expr *> ReductionOps,
5929                                     ReductionOptionsTy Options) {
5930   if (!CGF.HaveInsertPoint())
5931     return;
5932 
5933   bool WithNowait = Options.WithNowait;
5934   bool SimpleReduction = Options.SimpleReduction;
5935 
5936   // Next code should be emitted for reduction:
5937   //
5938   // static kmp_critical_name lock = { 0 };
5939   //
5940   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5941   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5942   //  ...
5943   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5944   //  *(Type<n>-1*)rhs[<n>-1]);
5945   // }
5946   //
5947   // ...
5948   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5949   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5950   // RedList, reduce_func, &<lock>)) {
5951   // case 1:
5952   //  ...
5953   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5954   //  ...
5955   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5956   // break;
5957   // case 2:
5958   //  ...
5959   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5960   //  ...
5961   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5962   // break;
5963   // default:;
5964   // }
5965   //
5966   // if SimpleReduction is true, only the next code is generated:
5967   //  ...
5968   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5969   //  ...
5970 
5971   ASTContext &C = CGM.getContext();
5972 
5973   if (SimpleReduction) {
5974     CodeGenFunction::RunCleanupsScope Scope(CGF);
5975     auto IPriv = Privates.begin();
5976     auto ILHS = LHSExprs.begin();
5977     auto IRHS = RHSExprs.begin();
5978     for (const Expr *E : ReductionOps) {
5979       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5980                                   cast<DeclRefExpr>(*IRHS));
5981       ++IPriv;
5982       ++ILHS;
5983       ++IRHS;
5984     }
5985     return;
5986   }
5987 
5988   // 1. Build a list of reduction variables.
5989   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5990   auto Size = RHSExprs.size();
5991   for (const Expr *E : Privates) {
5992     if (E->getType()->isVariablyModifiedType())
5993       // Reserve place for array size.
5994       ++Size;
5995   }
5996   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5997   QualType ReductionArrayTy =
5998       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5999                              /*IndexTypeQuals=*/0);
6000   Address ReductionList =
6001       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6002   auto IPriv = Privates.begin();
6003   unsigned Idx = 0;
6004   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6005     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6006     CGF.Builder.CreateStore(
6007         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6008             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6009         Elem);
6010     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6011       // Store array size.
6012       ++Idx;
6013       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6014       llvm::Value *Size = CGF.Builder.CreateIntCast(
6015           CGF.getVLASize(
6016                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6017               .NumElts,
6018           CGF.SizeTy, /*isSigned=*/false);
6019       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6020                               Elem);
6021     }
6022   }
6023 
6024   // 2. Emit reduce_func().
6025   llvm::Function *ReductionFn = emitReductionFunction(
6026       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6027       LHSExprs, RHSExprs, ReductionOps);
6028 
6029   // 3. Create static kmp_critical_name lock = { 0 };
6030   std::string Name = getName({"reduction"});
6031   llvm::Value *Lock = getCriticalRegionLock(Name);
6032 
6033   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6034   // RedList, reduce_func, &<lock>);
6035   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6036   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6037   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6038   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6039       ReductionList.getPointer(), CGF.VoidPtrTy);
6040   llvm::Value *Args[] = {
6041       IdentTLoc,                             // ident_t *<loc>
6042       ThreadId,                              // i32 <gtid>
6043       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6044       ReductionArrayTySize,                  // size_type sizeof(RedList)
6045       RL,                                    // void *RedList
6046       ReductionFn, // void (*) (void *, void *) <reduce_func>
6047       Lock         // kmp_critical_name *&<lock>
6048   };
6049   llvm::Value *Res = CGF.EmitRuntimeCall(
6050       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6051                                        : OMPRTL__kmpc_reduce),
6052       Args);
6053 
6054   // 5. Build switch(res)
6055   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6056   llvm::SwitchInst *SwInst =
6057       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6058 
6059   // 6. Build case 1:
6060   //  ...
6061   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6062   //  ...
6063   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6064   // break;
6065   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6066   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6067   CGF.EmitBlock(Case1BB);
6068 
6069   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6070   llvm::Value *EndArgs[] = {
6071       IdentTLoc, // ident_t *<loc>
6072       ThreadId,  // i32 <gtid>
6073       Lock       // kmp_critical_name *&<lock>
6074   };
6075   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6076                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6077     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6078     auto IPriv = Privates.begin();
6079     auto ILHS = LHSExprs.begin();
6080     auto IRHS = RHSExprs.begin();
6081     for (const Expr *E : ReductionOps) {
6082       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6083                                      cast<DeclRefExpr>(*IRHS));
6084       ++IPriv;
6085       ++ILHS;
6086       ++IRHS;
6087     }
6088   };
6089   RegionCodeGenTy RCG(CodeGen);
6090   CommonActionTy Action(
6091       nullptr, llvm::None,
6092       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6093                                        : OMPRTL__kmpc_end_reduce),
6094       EndArgs);
6095   RCG.setAction(Action);
6096   RCG(CGF);
6097 
6098   CGF.EmitBranch(DefaultBB);
6099 
6100   // 7. Build case 2:
6101   //  ...
6102   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6103   //  ...
6104   // break;
6105   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6106   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6107   CGF.EmitBlock(Case2BB);
6108 
6109   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6110                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6111     auto ILHS = LHSExprs.begin();
6112     auto IRHS = RHSExprs.begin();
6113     auto IPriv = Privates.begin();
6114     for (const Expr *E : ReductionOps) {
6115       const Expr *XExpr = nullptr;
6116       const Expr *EExpr = nullptr;
6117       const Expr *UpExpr = nullptr;
6118       BinaryOperatorKind BO = BO_Comma;
6119       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6120         if (BO->getOpcode() == BO_Assign) {
6121           XExpr = BO->getLHS();
6122           UpExpr = BO->getRHS();
6123         }
6124       }
6125       // Try to emit update expression as a simple atomic.
6126       const Expr *RHSExpr = UpExpr;
6127       if (RHSExpr) {
6128         // Analyze RHS part of the whole expression.
6129         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6130                 RHSExpr->IgnoreParenImpCasts())) {
6131           // If this is a conditional operator, analyze its condition for
6132           // min/max reduction operator.
6133           RHSExpr = ACO->getCond();
6134         }
6135         if (const auto *BORHS =
6136                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6137           EExpr = BORHS->getRHS();
6138           BO = BORHS->getOpcode();
6139         }
6140       }
6141       if (XExpr) {
6142         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6143         auto &&AtomicRedGen = [BO, VD,
6144                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6145                                     const Expr *EExpr, const Expr *UpExpr) {
6146           LValue X = CGF.EmitLValue(XExpr);
6147           RValue E;
6148           if (EExpr)
6149             E = CGF.EmitAnyExpr(EExpr);
6150           CGF.EmitOMPAtomicSimpleUpdateExpr(
6151               X, E, BO, /*IsXLHSInRHSPart=*/true,
6152               llvm::AtomicOrdering::Monotonic, Loc,
6153               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6154                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6155                 PrivateScope.addPrivate(
6156                     VD, [&CGF, VD, XRValue, Loc]() {
6157                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6158                       CGF.emitOMPSimpleStore(
6159                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6160                           VD->getType().getNonReferenceType(), Loc);
6161                       return LHSTemp;
6162                     });
6163                 (void)PrivateScope.Privatize();
6164                 return CGF.EmitAnyExpr(UpExpr);
6165               });
6166         };
6167         if ((*IPriv)->getType()->isArrayType()) {
6168           // Emit atomic reduction for array section.
6169           const auto *RHSVar =
6170               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6171           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6172                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6173         } else {
6174           // Emit atomic reduction for array subscript or single variable.
6175           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6176         }
6177       } else {
6178         // Emit as a critical region.
6179         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6180                                            const Expr *, const Expr *) {
6181           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6182           std::string Name = RT.getName({"atomic_reduction"});
6183           RT.emitCriticalRegion(
6184               CGF, Name,
6185               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6186                 Action.Enter(CGF);
6187                 emitReductionCombiner(CGF, E);
6188               },
6189               Loc);
6190         };
6191         if ((*IPriv)->getType()->isArrayType()) {
6192           const auto *LHSVar =
6193               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6194           const auto *RHSVar =
6195               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6196           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6197                                     CritRedGen);
6198         } else {
6199           CritRedGen(CGF, nullptr, nullptr, nullptr);
6200         }
6201       }
6202       ++ILHS;
6203       ++IRHS;
6204       ++IPriv;
6205     }
6206   };
6207   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6208   if (!WithNowait) {
6209     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6210     llvm::Value *EndArgs[] = {
6211         IdentTLoc, // ident_t *<loc>
6212         ThreadId,  // i32 <gtid>
6213         Lock       // kmp_critical_name *&<lock>
6214     };
6215     CommonActionTy Action(nullptr, llvm::None,
6216                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6217                           EndArgs);
6218     AtomicRCG.setAction(Action);
6219     AtomicRCG(CGF);
6220   } else {
6221     AtomicRCG(CGF);
6222   }
6223 
6224   CGF.EmitBranch(DefaultBB);
6225   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6226 }
6227 
6228 /// Generates unique name for artificial threadprivate variables.
6229 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6230 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6231                                       const Expr *Ref) {
6232   SmallString<256> Buffer;
6233   llvm::raw_svector_ostream Out(Buffer);
6234   const clang::DeclRefExpr *DE;
6235   const VarDecl *D = ::getBaseDecl(Ref, DE);
6236   if (!D)
6237     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6238   D = D->getCanonicalDecl();
6239   std::string Name = CGM.getOpenMPRuntime().getName(
6240       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6241   Out << Prefix << Name << "_"
6242       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6243   return std::string(Out.str());
6244 }
6245 
6246 /// Emits reduction initializer function:
6247 /// \code
6248 /// void @.red_init(void* %arg) {
6249 /// %0 = bitcast void* %arg to <type>*
6250 /// store <type> <init>, <type>* %0
6251 /// ret void
6252 /// }
6253 /// \endcode
6254 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6255                                            SourceLocation Loc,
6256                                            ReductionCodeGen &RCG, unsigned N) {
6257   ASTContext &C = CGM.getContext();
6258   FunctionArgList Args;
6259   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6260                           ImplicitParamDecl::Other);
6261   Args.emplace_back(&Param);
6262   const auto &FnInfo =
6263       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6264   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6265   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6266   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6267                                     Name, &CGM.getModule());
6268   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6269   Fn->setDoesNotRecurse();
6270   CodeGenFunction CGF(CGM);
6271   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6272   Address PrivateAddr = CGF.EmitLoadOfPointer(
6273       CGF.GetAddrOfLocalVar(&Param),
6274       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6275   llvm::Value *Size = nullptr;
6276   // If the size of the reduction item is non-constant, load it from global
6277   // threadprivate variable.
6278   if (RCG.getSizes(N).second) {
6279     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6280         CGF, CGM.getContext().getSizeType(),
6281         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6282     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6283                                 CGM.getContext().getSizeType(), Loc);
6284   }
6285   RCG.emitAggregateType(CGF, N, Size);
6286   LValue SharedLVal;
6287   // If initializer uses initializer from declare reduction construct, emit a
6288   // pointer to the address of the original reduction item (reuired by reduction
6289   // initializer)
6290   if (RCG.usesReductionInitializer(N)) {
6291     Address SharedAddr =
6292         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6293             CGF, CGM.getContext().VoidPtrTy,
6294             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295     SharedAddr = CGF.EmitLoadOfPointer(
6296         SharedAddr,
6297         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6298     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6299   } else {
6300     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6301         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6302         CGM.getContext().VoidPtrTy);
6303   }
6304   // Emit the initializer:
6305   // %0 = bitcast void* %arg to <type>*
6306   // store <type> <init>, <type>* %0
6307   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6308                          [](CodeGenFunction &) { return false; });
6309   CGF.FinishFunction();
6310   return Fn;
6311 }
6312 
6313 /// Emits reduction combiner function:
6314 /// \code
6315 /// void @.red_comb(void* %arg0, void* %arg1) {
6316 /// %lhs = bitcast void* %arg0 to <type>*
6317 /// %rhs = bitcast void* %arg1 to <type>*
6318 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6319 /// store <type> %2, <type>* %lhs
6320 /// ret void
6321 /// }
6322 /// \endcode
6323 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6324                                            SourceLocation Loc,
6325                                            ReductionCodeGen &RCG, unsigned N,
6326                                            const Expr *ReductionOp,
6327                                            const Expr *LHS, const Expr *RHS,
6328                                            const Expr *PrivateRef) {
6329   ASTContext &C = CGM.getContext();
6330   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6331   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6332   FunctionArgList Args;
6333   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6334                                C.VoidPtrTy, ImplicitParamDecl::Other);
6335   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6336                             ImplicitParamDecl::Other);
6337   Args.emplace_back(&ParamInOut);
6338   Args.emplace_back(&ParamIn);
6339   const auto &FnInfo =
6340       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6341   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6342   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6343   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6344                                     Name, &CGM.getModule());
6345   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6346   Fn->setDoesNotRecurse();
6347   CodeGenFunction CGF(CGM);
6348   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6349   llvm::Value *Size = nullptr;
6350   // If the size of the reduction item is non-constant, load it from global
6351   // threadprivate variable.
6352   if (RCG.getSizes(N).second) {
6353     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6354         CGF, CGM.getContext().getSizeType(),
6355         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6356     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6357                                 CGM.getContext().getSizeType(), Loc);
6358   }
6359   RCG.emitAggregateType(CGF, N, Size);
6360   // Remap lhs and rhs variables to the addresses of the function arguments.
6361   // %lhs = bitcast void* %arg0 to <type>*
6362   // %rhs = bitcast void* %arg1 to <type>*
6363   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6364   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6365     // Pull out the pointer to the variable.
6366     Address PtrAddr = CGF.EmitLoadOfPointer(
6367         CGF.GetAddrOfLocalVar(&ParamInOut),
6368         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6369     return CGF.Builder.CreateElementBitCast(
6370         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6371   });
6372   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6373     // Pull out the pointer to the variable.
6374     Address PtrAddr = CGF.EmitLoadOfPointer(
6375         CGF.GetAddrOfLocalVar(&ParamIn),
6376         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6377     return CGF.Builder.CreateElementBitCast(
6378         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6379   });
6380   PrivateScope.Privatize();
6381   // Emit the combiner body:
6382   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6383   // store <type> %2, <type>* %lhs
6384   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6385       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6386       cast<DeclRefExpr>(RHS));
6387   CGF.FinishFunction();
6388   return Fn;
6389 }
6390 
6391 /// Emits reduction finalizer function:
6392 /// \code
6393 /// void @.red_fini(void* %arg) {
6394 /// %0 = bitcast void* %arg to <type>*
6395 /// <destroy>(<type>* %0)
6396 /// ret void
6397 /// }
6398 /// \endcode
6399 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6400                                            SourceLocation Loc,
6401                                            ReductionCodeGen &RCG, unsigned N) {
6402   if (!RCG.needCleanups(N))
6403     return nullptr;
6404   ASTContext &C = CGM.getContext();
6405   FunctionArgList Args;
6406   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6407                           ImplicitParamDecl::Other);
6408   Args.emplace_back(&Param);
6409   const auto &FnInfo =
6410       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6411   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6412   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6413   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6414                                     Name, &CGM.getModule());
6415   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6416   Fn->setDoesNotRecurse();
6417   CodeGenFunction CGF(CGM);
6418   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6419   Address PrivateAddr = CGF.EmitLoadOfPointer(
6420       CGF.GetAddrOfLocalVar(&Param),
6421       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6422   llvm::Value *Size = nullptr;
6423   // If the size of the reduction item is non-constant, load it from global
6424   // threadprivate variable.
6425   if (RCG.getSizes(N).second) {
6426     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6427         CGF, CGM.getContext().getSizeType(),
6428         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6429     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6430                                 CGM.getContext().getSizeType(), Loc);
6431   }
6432   RCG.emitAggregateType(CGF, N, Size);
6433   // Emit the finalizer body:
6434   // <destroy>(<type>* %0)
6435   RCG.emitCleanups(CGF, N, PrivateAddr);
6436   CGF.FinishFunction(Loc);
6437   return Fn;
6438 }
6439 
6440 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6441     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6442     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6443   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6444     return nullptr;
6445 
6446   // Build typedef struct:
6447   // kmp_task_red_input {
6448   //   void *reduce_shar; // shared reduction item
6449   //   size_t reduce_size; // size of data item
6450   //   void *reduce_init; // data initialization routine
6451   //   void *reduce_fini; // data finalization routine
6452   //   void *reduce_comb; // data combiner routine
6453   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6454   // } kmp_task_red_input_t;
6455   ASTContext &C = CGM.getContext();
6456   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6457   RD->startDefinition();
6458   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6459   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6460   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6461   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6462   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6463   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6464       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6465   RD->completeDefinition();
6466   QualType RDType = C.getRecordType(RD);
6467   unsigned Size = Data.ReductionVars.size();
6468   llvm::APInt ArraySize(/*numBits=*/64, Size);
6469   QualType ArrayRDType = C.getConstantArrayType(
6470       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6471   // kmp_task_red_input_t .rd_input.[Size];
6472   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6473   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6474                        Data.ReductionOps);
6475   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6476     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6477     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6478                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6479     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6480         TaskRedInput.getPointer(), Idxs,
6481         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6482         ".rd_input.gep.");
6483     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6484     // ElemLVal.reduce_shar = &Shareds[Cnt];
6485     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6486     RCG.emitSharedLValue(CGF, Cnt);
6487     llvm::Value *CastedShared =
6488         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6489     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6490     RCG.emitAggregateType(CGF, Cnt);
6491     llvm::Value *SizeValInChars;
6492     llvm::Value *SizeVal;
6493     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6494     // We use delayed creation/initialization for VLAs, array sections and
6495     // custom reduction initializations. It is required because runtime does not
6496     // provide the way to pass the sizes of VLAs/array sections to
6497     // initializer/combiner/finalizer functions and does not pass the pointer to
6498     // original reduction item to the initializer. Instead threadprivate global
6499     // variables are used to store these values and use them in the functions.
6500     bool DelayedCreation = !!SizeVal;
6501     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6502                                                /*isSigned=*/false);
6503     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6504     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6505     // ElemLVal.reduce_init = init;
6506     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6507     llvm::Value *InitAddr =
6508         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6509     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6510     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6511     // ElemLVal.reduce_fini = fini;
6512     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6513     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6514     llvm::Value *FiniAddr = Fini
6515                                 ? CGF.EmitCastToVoidPtr(Fini)
6516                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6517     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6518     // ElemLVal.reduce_comb = comb;
6519     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6520     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6521         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6522         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6523     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6524     // ElemLVal.flags = 0;
6525     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6526     if (DelayedCreation) {
6527       CGF.EmitStoreOfScalar(
6528           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6529           FlagsLVal);
6530     } else
6531       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6532                                  FlagsLVal.getType());
6533   }
6534   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6535   // *data);
6536   llvm::Value *Args[] = {
6537       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6538                                 /*isSigned=*/true),
6539       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6540       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6541                                                       CGM.VoidPtrTy)};
6542   return CGF.EmitRuntimeCall(
6543       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6544 }
6545 
6546 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6547                                               SourceLocation Loc,
6548                                               ReductionCodeGen &RCG,
6549                                               unsigned N) {
6550   auto Sizes = RCG.getSizes(N);
6551   // Emit threadprivate global variable if the type is non-constant
6552   // (Sizes.second = nullptr).
6553   if (Sizes.second) {
6554     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6555                                                      /*isSigned=*/false);
6556     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6557         CGF, CGM.getContext().getSizeType(),
6558         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6559     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6560   }
6561   // Store address of the original reduction item if custom initializer is used.
6562   if (RCG.usesReductionInitializer(N)) {
6563     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6564         CGF, CGM.getContext().VoidPtrTy,
6565         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6566     CGF.Builder.CreateStore(
6567         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6568             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6569         SharedAddr, /*IsVolatile=*/false);
6570   }
6571 }
6572 
6573 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6574                                               SourceLocation Loc,
6575                                               llvm::Value *ReductionsPtr,
6576                                               LValue SharedLVal) {
6577   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6578   // *d);
6579   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6580                                                    CGM.IntTy,
6581                                                    /*isSigned=*/true),
6582                          ReductionsPtr,
6583                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6584                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6585   return Address(
6586       CGF.EmitRuntimeCall(
6587           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6588       SharedLVal.getAlignment());
6589 }
6590 
6591 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6592                                        SourceLocation Loc) {
6593   if (!CGF.HaveInsertPoint())
6594     return;
6595 
6596   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6597   if (OMPBuilder) {
6598     OMPBuilder->CreateTaskwait(CGF.Builder);
6599   } else {
6600     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6601     // global_tid);
6602     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6603     // Ignore return result until untied tasks are supported.
6604     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6605   }
6606 
6607   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6608     Region->emitUntiedSwitch(CGF);
6609 }
6610 
6611 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6612                                            OpenMPDirectiveKind InnerKind,
6613                                            const RegionCodeGenTy &CodeGen,
6614                                            bool HasCancel) {
6615   if (!CGF.HaveInsertPoint())
6616     return;
6617   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6618   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6619 }
6620 
6621 namespace {
6622 enum RTCancelKind {
6623   CancelNoreq = 0,
6624   CancelParallel = 1,
6625   CancelLoop = 2,
6626   CancelSections = 3,
6627   CancelTaskgroup = 4
6628 };
6629 } // anonymous namespace
6630 
6631 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6632   RTCancelKind CancelKind = CancelNoreq;
6633   if (CancelRegion == OMPD_parallel)
6634     CancelKind = CancelParallel;
6635   else if (CancelRegion == OMPD_for)
6636     CancelKind = CancelLoop;
6637   else if (CancelRegion == OMPD_sections)
6638     CancelKind = CancelSections;
6639   else {
6640     assert(CancelRegion == OMPD_taskgroup);
6641     CancelKind = CancelTaskgroup;
6642   }
6643   return CancelKind;
6644 }
6645 
6646 void CGOpenMPRuntime::emitCancellationPointCall(
6647     CodeGenFunction &CGF, SourceLocation Loc,
6648     OpenMPDirectiveKind CancelRegion) {
6649   if (!CGF.HaveInsertPoint())
6650     return;
6651   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6652   // global_tid, kmp_int32 cncl_kind);
6653   if (auto *OMPRegionInfo =
6654           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6655     // For 'cancellation point taskgroup', the task region info may not have a
6656     // cancel. This may instead happen in another adjacent task.
6657     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6658       llvm::Value *Args[] = {
6659           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6660           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6661       // Ignore return result until untied tasks are supported.
6662       llvm::Value *Result = CGF.EmitRuntimeCall(
6663           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6664       // if (__kmpc_cancellationpoint()) {
6665       //   exit from construct;
6666       // }
6667       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6668       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6669       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6670       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6671       CGF.EmitBlock(ExitBB);
6672       // exit from construct;
6673       CodeGenFunction::JumpDest CancelDest =
6674           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6675       CGF.EmitBranchThroughCleanup(CancelDest);
6676       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6677     }
6678   }
6679 }
6680 
6681 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6682                                      const Expr *IfCond,
6683                                      OpenMPDirectiveKind CancelRegion) {
6684   if (!CGF.HaveInsertPoint())
6685     return;
6686   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6687   // kmp_int32 cncl_kind);
6688   if (auto *OMPRegionInfo =
6689           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6690     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6691                                                         PrePostActionTy &) {
6692       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6693       llvm::Value *Args[] = {
6694           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6695           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6696       // Ignore return result until untied tasks are supported.
6697       llvm::Value *Result = CGF.EmitRuntimeCall(
6698           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6699       // if (__kmpc_cancel()) {
6700       //   exit from construct;
6701       // }
6702       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6703       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6704       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6705       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6706       CGF.EmitBlock(ExitBB);
6707       // exit from construct;
6708       CodeGenFunction::JumpDest CancelDest =
6709           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6710       CGF.EmitBranchThroughCleanup(CancelDest);
6711       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6712     };
6713     if (IfCond) {
6714       emitIfClause(CGF, IfCond, ThenGen,
6715                    [](CodeGenFunction &, PrePostActionTy &) {});
6716     } else {
6717       RegionCodeGenTy ThenRCG(ThenGen);
6718       ThenRCG(CGF);
6719     }
6720   }
6721 }
6722 
6723 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6724     const OMPExecutableDirective &D, StringRef ParentName,
6725     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6726     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6727   assert(!ParentName.empty() && "Invalid target region parent name!");
6728   HasEmittedTargetRegion = true;
6729   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6730                                    IsOffloadEntry, CodeGen);
6731 }
6732 
6733 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6734     const OMPExecutableDirective &D, StringRef ParentName,
6735     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6736     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6737   // Create a unique name for the entry function using the source location
6738   // information of the current target region. The name will be something like:
6739   //
6740   // __omp_offloading_DD_FFFF_PP_lBB
6741   //
6742   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6743   // mangled name of the function that encloses the target region and BB is the
6744   // line number of the target region.
6745 
6746   unsigned DeviceID;
6747   unsigned FileID;
6748   unsigned Line;
6749   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6750                            Line);
6751   SmallString<64> EntryFnName;
6752   {
6753     llvm::raw_svector_ostream OS(EntryFnName);
6754     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6755        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6756   }
6757 
6758   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6759 
6760   CodeGenFunction CGF(CGM, true);
6761   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6762   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6763 
6764   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6765 
6766   // If this target outline function is not an offload entry, we don't need to
6767   // register it.
6768   if (!IsOffloadEntry)
6769     return;
6770 
6771   // The target region ID is used by the runtime library to identify the current
6772   // target region, so it only has to be unique and not necessarily point to
6773   // anything. It could be the pointer to the outlined function that implements
6774   // the target region, but we aren't using that so that the compiler doesn't
6775   // need to keep that, and could therefore inline the host function if proven
6776   // worthwhile during optimization. In the other hand, if emitting code for the
6777   // device, the ID has to be the function address so that it can retrieved from
6778   // the offloading entry and launched by the runtime library. We also mark the
6779   // outlined function to have external linkage in case we are emitting code for
6780   // the device, because these functions will be entry points to the device.
6781 
6782   if (CGM.getLangOpts().OpenMPIsDevice) {
6783     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6784     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6785     OutlinedFn->setDSOLocal(false);
6786   } else {
6787     std::string Name = getName({EntryFnName, "region_id"});
6788     OutlinedFnID = new llvm::GlobalVariable(
6789         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6790         llvm::GlobalValue::WeakAnyLinkage,
6791         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6792   }
6793 
6794   // Register the information for the entry associated with this target region.
6795   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6796       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6797       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6798 }
6799 
6800 /// Checks if the expression is constant or does not have non-trivial function
6801 /// calls.
6802 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6803   // We can skip constant expressions.
6804   // We can skip expressions with trivial calls or simple expressions.
6805   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6806           !E->hasNonTrivialCall(Ctx)) &&
6807          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6808 }
6809 
6810 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6811                                                     const Stmt *Body) {
6812   const Stmt *Child = Body->IgnoreContainers();
6813   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6814     Child = nullptr;
6815     for (const Stmt *S : C->body()) {
6816       if (const auto *E = dyn_cast<Expr>(S)) {
6817         if (isTrivial(Ctx, E))
6818           continue;
6819       }
6820       // Some of the statements can be ignored.
6821       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6822           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6823         continue;
6824       // Analyze declarations.
6825       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6826         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6827               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6828                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6829                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6830                   isa<UsingDirectiveDecl>(D) ||
6831                   isa<OMPDeclareReductionDecl>(D) ||
6832                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6833                 return true;
6834               const auto *VD = dyn_cast<VarDecl>(D);
6835               if (!VD)
6836                 return false;
6837               return VD->isConstexpr() ||
6838                      ((VD->getType().isTrivialType(Ctx) ||
6839                        VD->getType()->isReferenceType()) &&
6840                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6841             }))
6842           continue;
6843       }
6844       // Found multiple children - cannot get the one child only.
6845       if (Child)
6846         return nullptr;
6847       Child = S;
6848     }
6849     if (Child)
6850       Child = Child->IgnoreContainers();
6851   }
6852   return Child;
6853 }
6854 
6855 /// Emit the number of teams for a target directive.  Inspect the num_teams
6856 /// clause associated with a teams construct combined or closely nested
6857 /// with the target directive.
6858 ///
6859 /// Emit a team of size one for directives such as 'target parallel' that
6860 /// have no associated teams construct.
6861 ///
6862 /// Otherwise, return nullptr.
6863 static llvm::Value *
6864 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6865                                const OMPExecutableDirective &D) {
6866   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6867          "Clauses associated with the teams directive expected to be emitted "
6868          "only for the host!");
6869   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6870   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6871          "Expected target-based executable directive.");
6872   CGBuilderTy &Bld = CGF.Builder;
6873   switch (DirectiveKind) {
6874   case OMPD_target: {
6875     const auto *CS = D.getInnermostCapturedStmt();
6876     const auto *Body =
6877         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6878     const Stmt *ChildStmt =
6879         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6880     if (const auto *NestedDir =
6881             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6882       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6883         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6884           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6885           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6886           const Expr *NumTeams =
6887               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6888           llvm::Value *NumTeamsVal =
6889               CGF.EmitScalarExpr(NumTeams,
6890                                  /*IgnoreResultAssign*/ true);
6891           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6892                                    /*isSigned=*/true);
6893         }
6894         return Bld.getInt32(0);
6895       }
6896       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6897           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6898         return Bld.getInt32(1);
6899       return Bld.getInt32(0);
6900     }
6901     return nullptr;
6902   }
6903   case OMPD_target_teams:
6904   case OMPD_target_teams_distribute:
6905   case OMPD_target_teams_distribute_simd:
6906   case OMPD_target_teams_distribute_parallel_for:
6907   case OMPD_target_teams_distribute_parallel_for_simd: {
6908     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6909       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6910       const Expr *NumTeams =
6911           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6912       llvm::Value *NumTeamsVal =
6913           CGF.EmitScalarExpr(NumTeams,
6914                              /*IgnoreResultAssign*/ true);
6915       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6916                                /*isSigned=*/true);
6917     }
6918     return Bld.getInt32(0);
6919   }
6920   case OMPD_target_parallel:
6921   case OMPD_target_parallel_for:
6922   case OMPD_target_parallel_for_simd:
6923   case OMPD_target_simd:
6924     return Bld.getInt32(1);
6925   case OMPD_parallel:
6926   case OMPD_for:
6927   case OMPD_parallel_for:
6928   case OMPD_parallel_master:
6929   case OMPD_parallel_sections:
6930   case OMPD_for_simd:
6931   case OMPD_parallel_for_simd:
6932   case OMPD_cancel:
6933   case OMPD_cancellation_point:
6934   case OMPD_ordered:
6935   case OMPD_threadprivate:
6936   case OMPD_allocate:
6937   case OMPD_task:
6938   case OMPD_simd:
6939   case OMPD_sections:
6940   case OMPD_section:
6941   case OMPD_single:
6942   case OMPD_master:
6943   case OMPD_critical:
6944   case OMPD_taskyield:
6945   case OMPD_barrier:
6946   case OMPD_taskwait:
6947   case OMPD_taskgroup:
6948   case OMPD_atomic:
6949   case OMPD_flush:
6950   case OMPD_depobj:
6951   case OMPD_teams:
6952   case OMPD_target_data:
6953   case OMPD_target_exit_data:
6954   case OMPD_target_enter_data:
6955   case OMPD_distribute:
6956   case OMPD_distribute_simd:
6957   case OMPD_distribute_parallel_for:
6958   case OMPD_distribute_parallel_for_simd:
6959   case OMPD_teams_distribute:
6960   case OMPD_teams_distribute_simd:
6961   case OMPD_teams_distribute_parallel_for:
6962   case OMPD_teams_distribute_parallel_for_simd:
6963   case OMPD_target_update:
6964   case OMPD_declare_simd:
6965   case OMPD_declare_variant:
6966   case OMPD_declare_target:
6967   case OMPD_end_declare_target:
6968   case OMPD_declare_reduction:
6969   case OMPD_declare_mapper:
6970   case OMPD_taskloop:
6971   case OMPD_taskloop_simd:
6972   case OMPD_master_taskloop:
6973   case OMPD_master_taskloop_simd:
6974   case OMPD_parallel_master_taskloop:
6975   case OMPD_parallel_master_taskloop_simd:
6976   case OMPD_requires:
6977   case OMPD_unknown:
6978     break;
6979   }
6980   llvm_unreachable("Unexpected directive kind.");
6981 }
6982 
6983 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6984                                   llvm::Value *DefaultThreadLimitVal) {
6985   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6986       CGF.getContext(), CS->getCapturedStmt());
6987   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6988     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6989       llvm::Value *NumThreads = nullptr;
6990       llvm::Value *CondVal = nullptr;
6991       // Handle if clause. If if clause present, the number of threads is
6992       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6993       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6994         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6995         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6996         const OMPIfClause *IfClause = nullptr;
6997         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6998           if (C->getNameModifier() == OMPD_unknown ||
6999               C->getNameModifier() == OMPD_parallel) {
7000             IfClause = C;
7001             break;
7002           }
7003         }
7004         if (IfClause) {
7005           const Expr *Cond = IfClause->getCondition();
7006           bool Result;
7007           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7008             if (!Result)
7009               return CGF.Builder.getInt32(1);
7010           } else {
7011             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7012             if (const auto *PreInit =
7013                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7014               for (const auto *I : PreInit->decls()) {
7015                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7016                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7017                 } else {
7018                   CodeGenFunction::AutoVarEmission Emission =
7019                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7020                   CGF.EmitAutoVarCleanups(Emission);
7021                 }
7022               }
7023             }
7024             CondVal = CGF.EvaluateExprAsBool(Cond);
7025           }
7026         }
7027       }
7028       // Check the value of num_threads clause iff if clause was not specified
7029       // or is not evaluated to false.
7030       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7031         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7032         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7033         const auto *NumThreadsClause =
7034             Dir->getSingleClause<OMPNumThreadsClause>();
7035         CodeGenFunction::LexicalScope Scope(
7036             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7037         if (const auto *PreInit =
7038                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7039           for (const auto *I : PreInit->decls()) {
7040             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7041               CGF.EmitVarDecl(cast<VarDecl>(*I));
7042             } else {
7043               CodeGenFunction::AutoVarEmission Emission =
7044                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7045               CGF.EmitAutoVarCleanups(Emission);
7046             }
7047           }
7048         }
7049         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7050         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7051                                                /*isSigned=*/false);
7052         if (DefaultThreadLimitVal)
7053           NumThreads = CGF.Builder.CreateSelect(
7054               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7055               DefaultThreadLimitVal, NumThreads);
7056       } else {
7057         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7058                                            : CGF.Builder.getInt32(0);
7059       }
7060       // Process condition of the if clause.
7061       if (CondVal) {
7062         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7063                                               CGF.Builder.getInt32(1));
7064       }
7065       return NumThreads;
7066     }
7067     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7068       return CGF.Builder.getInt32(1);
7069     return DefaultThreadLimitVal;
7070   }
7071   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7072                                : CGF.Builder.getInt32(0);
7073 }
7074 
7075 /// Emit the number of threads for a target directive.  Inspect the
7076 /// thread_limit clause associated with a teams construct combined or closely
7077 /// nested with the target directive.
7078 ///
7079 /// Emit the num_threads clause for directives such as 'target parallel' that
7080 /// have no associated teams construct.
7081 ///
7082 /// Otherwise, return nullptr.
7083 static llvm::Value *
7084 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7085                                  const OMPExecutableDirective &D) {
7086   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7087          "Clauses associated with the teams directive expected to be emitted "
7088          "only for the host!");
7089   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7090   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7091          "Expected target-based executable directive.");
7092   CGBuilderTy &Bld = CGF.Builder;
7093   llvm::Value *ThreadLimitVal = nullptr;
7094   llvm::Value *NumThreadsVal = nullptr;
7095   switch (DirectiveKind) {
7096   case OMPD_target: {
7097     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7098     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7099       return NumThreads;
7100     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7101         CGF.getContext(), CS->getCapturedStmt());
7102     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7103       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7104         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7105         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7106         const auto *ThreadLimitClause =
7107             Dir->getSingleClause<OMPThreadLimitClause>();
7108         CodeGenFunction::LexicalScope Scope(
7109             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7110         if (const auto *PreInit =
7111                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7112           for (const auto *I : PreInit->decls()) {
7113             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7114               CGF.EmitVarDecl(cast<VarDecl>(*I));
7115             } else {
7116               CodeGenFunction::AutoVarEmission Emission =
7117                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7118               CGF.EmitAutoVarCleanups(Emission);
7119             }
7120           }
7121         }
7122         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7123             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7124         ThreadLimitVal =
7125             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7126       }
7127       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7128           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7129         CS = Dir->getInnermostCapturedStmt();
7130         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7131             CGF.getContext(), CS->getCapturedStmt());
7132         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7133       }
7134       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7135           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7136         CS = Dir->getInnermostCapturedStmt();
7137         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7138           return NumThreads;
7139       }
7140       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7141         return Bld.getInt32(1);
7142     }
7143     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7144   }
7145   case OMPD_target_teams: {
7146     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7147       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7148       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7149       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7150           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7151       ThreadLimitVal =
7152           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7153     }
7154     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7155     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7156       return NumThreads;
7157     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7158         CGF.getContext(), CS->getCapturedStmt());
7159     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7160       if (Dir->getDirectiveKind() == OMPD_distribute) {
7161         CS = Dir->getInnermostCapturedStmt();
7162         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7163           return NumThreads;
7164       }
7165     }
7166     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7167   }
7168   case OMPD_target_teams_distribute:
7169     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7170       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7171       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7172       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7173           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7174       ThreadLimitVal =
7175           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7176     }
7177     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7178   case OMPD_target_parallel:
7179   case OMPD_target_parallel_for:
7180   case OMPD_target_parallel_for_simd:
7181   case OMPD_target_teams_distribute_parallel_for:
7182   case OMPD_target_teams_distribute_parallel_for_simd: {
7183     llvm::Value *CondVal = nullptr;
7184     // Handle if clause. If if clause present, the number of threads is
7185     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7186     if (D.hasClausesOfKind<OMPIfClause>()) {
7187       const OMPIfClause *IfClause = nullptr;
7188       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7189         if (C->getNameModifier() == OMPD_unknown ||
7190             C->getNameModifier() == OMPD_parallel) {
7191           IfClause = C;
7192           break;
7193         }
7194       }
7195       if (IfClause) {
7196         const Expr *Cond = IfClause->getCondition();
7197         bool Result;
7198         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7199           if (!Result)
7200             return Bld.getInt32(1);
7201         } else {
7202           CodeGenFunction::RunCleanupsScope Scope(CGF);
7203           CondVal = CGF.EvaluateExprAsBool(Cond);
7204         }
7205       }
7206     }
7207     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7208       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7209       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7210       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7211           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7212       ThreadLimitVal =
7213           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7214     }
7215     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7216       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7217       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7218       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7219           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7220       NumThreadsVal =
7221           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7222       ThreadLimitVal = ThreadLimitVal
7223                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7224                                                                 ThreadLimitVal),
7225                                               NumThreadsVal, ThreadLimitVal)
7226                            : NumThreadsVal;
7227     }
7228     if (!ThreadLimitVal)
7229       ThreadLimitVal = Bld.getInt32(0);
7230     if (CondVal)
7231       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7232     return ThreadLimitVal;
7233   }
7234   case OMPD_target_teams_distribute_simd:
7235   case OMPD_target_simd:
7236     return Bld.getInt32(1);
7237   case OMPD_parallel:
7238   case OMPD_for:
7239   case OMPD_parallel_for:
7240   case OMPD_parallel_master:
7241   case OMPD_parallel_sections:
7242   case OMPD_for_simd:
7243   case OMPD_parallel_for_simd:
7244   case OMPD_cancel:
7245   case OMPD_cancellation_point:
7246   case OMPD_ordered:
7247   case OMPD_threadprivate:
7248   case OMPD_allocate:
7249   case OMPD_task:
7250   case OMPD_simd:
7251   case OMPD_sections:
7252   case OMPD_section:
7253   case OMPD_single:
7254   case OMPD_master:
7255   case OMPD_critical:
7256   case OMPD_taskyield:
7257   case OMPD_barrier:
7258   case OMPD_taskwait:
7259   case OMPD_taskgroup:
7260   case OMPD_atomic:
7261   case OMPD_flush:
7262   case OMPD_depobj:
7263   case OMPD_teams:
7264   case OMPD_target_data:
7265   case OMPD_target_exit_data:
7266   case OMPD_target_enter_data:
7267   case OMPD_distribute:
7268   case OMPD_distribute_simd:
7269   case OMPD_distribute_parallel_for:
7270   case OMPD_distribute_parallel_for_simd:
7271   case OMPD_teams_distribute:
7272   case OMPD_teams_distribute_simd:
7273   case OMPD_teams_distribute_parallel_for:
7274   case OMPD_teams_distribute_parallel_for_simd:
7275   case OMPD_target_update:
7276   case OMPD_declare_simd:
7277   case OMPD_declare_variant:
7278   case OMPD_declare_target:
7279   case OMPD_end_declare_target:
7280   case OMPD_declare_reduction:
7281   case OMPD_declare_mapper:
7282   case OMPD_taskloop:
7283   case OMPD_taskloop_simd:
7284   case OMPD_master_taskloop:
7285   case OMPD_master_taskloop_simd:
7286   case OMPD_parallel_master_taskloop:
7287   case OMPD_parallel_master_taskloop_simd:
7288   case OMPD_requires:
7289   case OMPD_unknown:
7290     break;
7291   }
7292   llvm_unreachable("Unsupported directive kind.");
7293 }
7294 
7295 namespace {
7296 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7297 
7298 // Utility to handle information from clauses associated with a given
7299 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7300 // It provides a convenient interface to obtain the information and generate
7301 // code for that information.
7302 class MappableExprsHandler {
7303 public:
7304   /// Values for bit flags used to specify the mapping type for
7305   /// offloading.
7306   enum OpenMPOffloadMappingFlags : uint64_t {
7307     /// No flags
7308     OMP_MAP_NONE = 0x0,
7309     /// Allocate memory on the device and move data from host to device.
7310     OMP_MAP_TO = 0x01,
7311     /// Allocate memory on the device and move data from device to host.
7312     OMP_MAP_FROM = 0x02,
7313     /// Always perform the requested mapping action on the element, even
7314     /// if it was already mapped before.
7315     OMP_MAP_ALWAYS = 0x04,
7316     /// Delete the element from the device environment, ignoring the
7317     /// current reference count associated with the element.
7318     OMP_MAP_DELETE = 0x08,
7319     /// The element being mapped is a pointer-pointee pair; both the
7320     /// pointer and the pointee should be mapped.
7321     OMP_MAP_PTR_AND_OBJ = 0x10,
7322     /// This flags signals that the base address of an entry should be
7323     /// passed to the target kernel as an argument.
7324     OMP_MAP_TARGET_PARAM = 0x20,
7325     /// Signal that the runtime library has to return the device pointer
7326     /// in the current position for the data being mapped. Used when we have the
7327     /// use_device_ptr clause.
7328     OMP_MAP_RETURN_PARAM = 0x40,
7329     /// This flag signals that the reference being passed is a pointer to
7330     /// private data.
7331     OMP_MAP_PRIVATE = 0x80,
7332     /// Pass the element to the device by value.
7333     OMP_MAP_LITERAL = 0x100,
7334     /// Implicit map
7335     OMP_MAP_IMPLICIT = 0x200,
7336     /// Close is a hint to the runtime to allocate memory close to
7337     /// the target device.
7338     OMP_MAP_CLOSE = 0x400,
7339     /// The 16 MSBs of the flags indicate whether the entry is member of some
7340     /// struct/class.
7341     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7342     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7343   };
7344 
7345   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7346   static unsigned getFlagMemberOffset() {
7347     unsigned Offset = 0;
7348     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7349          Remain = Remain >> 1)
7350       Offset++;
7351     return Offset;
7352   }
7353 
7354   /// Class that associates information with a base pointer to be passed to the
7355   /// runtime library.
7356   class BasePointerInfo {
7357     /// The base pointer.
7358     llvm::Value *Ptr = nullptr;
7359     /// The base declaration that refers to this device pointer, or null if
7360     /// there is none.
7361     const ValueDecl *DevPtrDecl = nullptr;
7362 
7363   public:
7364     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7365         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7366     llvm::Value *operator*() const { return Ptr; }
7367     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7368     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7369   };
7370 
7371   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7372   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7373   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7374 
7375   /// Map between a struct and the its lowest & highest elements which have been
7376   /// mapped.
7377   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7378   ///                    HE(FieldIndex, Pointer)}
7379   struct StructRangeInfoTy {
7380     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7381         0, Address::invalid()};
7382     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7383         0, Address::invalid()};
7384     Address Base = Address::invalid();
7385   };
7386 
7387 private:
7388   /// Kind that defines how a device pointer has to be returned.
7389   struct MapInfo {
7390     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7391     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7392     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7393     bool ReturnDevicePointer = false;
7394     bool IsImplicit = false;
7395 
7396     MapInfo() = default;
7397     MapInfo(
7398         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7399         OpenMPMapClauseKind MapType,
7400         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7401         bool ReturnDevicePointer, bool IsImplicit)
7402         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7403           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7404   };
7405 
7406   /// If use_device_ptr is used on a pointer which is a struct member and there
7407   /// is no map information about it, then emission of that entry is deferred
7408   /// until the whole struct has been processed.
7409   struct DeferredDevicePtrEntryTy {
7410     const Expr *IE = nullptr;
7411     const ValueDecl *VD = nullptr;
7412 
7413     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7414         : IE(IE), VD(VD) {}
7415   };
7416 
7417   /// The target directive from where the mappable clauses were extracted. It
7418   /// is either a executable directive or a user-defined mapper directive.
7419   llvm::PointerUnion<const OMPExecutableDirective *,
7420                      const OMPDeclareMapperDecl *>
7421       CurDir;
7422 
7423   /// Function the directive is being generated for.
7424   CodeGenFunction &CGF;
7425 
7426   /// Set of all first private variables in the current directive.
7427   /// bool data is set to true if the variable is implicitly marked as
7428   /// firstprivate, false otherwise.
7429   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7430 
7431   /// Map between device pointer declarations and their expression components.
7432   /// The key value for declarations in 'this' is null.
7433   llvm::DenseMap<
7434       const ValueDecl *,
7435       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7436       DevPointersMap;
7437 
7438   llvm::Value *getExprTypeSize(const Expr *E) const {
7439     QualType ExprTy = E->getType().getCanonicalType();
7440 
7441     // Reference types are ignored for mapping purposes.
7442     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7443       ExprTy = RefTy->getPointeeType().getCanonicalType();
7444 
7445     // Given that an array section is considered a built-in type, we need to
7446     // do the calculation based on the length of the section instead of relying
7447     // on CGF.getTypeSize(E->getType()).
7448     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7449       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7450                             OAE->getBase()->IgnoreParenImpCasts())
7451                             .getCanonicalType();
7452 
7453       // If there is no length associated with the expression and lower bound is
7454       // not specified too, that means we are using the whole length of the
7455       // base.
7456       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7457           !OAE->getLowerBound())
7458         return CGF.getTypeSize(BaseTy);
7459 
7460       llvm::Value *ElemSize;
7461       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7462         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7463       } else {
7464         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7465         assert(ATy && "Expecting array type if not a pointer type.");
7466         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7467       }
7468 
7469       // If we don't have a length at this point, that is because we have an
7470       // array section with a single element.
7471       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7472         return ElemSize;
7473 
7474       if (const Expr *LenExpr = OAE->getLength()) {
7475         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7476         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7477                                              CGF.getContext().getSizeType(),
7478                                              LenExpr->getExprLoc());
7479         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7480       }
7481       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7482              OAE->getLowerBound() && "expected array_section[lb:].");
7483       // Size = sizetype - lb * elemtype;
7484       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7485       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7486       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7487                                        CGF.getContext().getSizeType(),
7488                                        OAE->getLowerBound()->getExprLoc());
7489       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7490       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7491       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7492       LengthVal = CGF.Builder.CreateSelect(
7493           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7494       return LengthVal;
7495     }
7496     return CGF.getTypeSize(ExprTy);
7497   }
7498 
7499   /// Return the corresponding bits for a given map clause modifier. Add
7500   /// a flag marking the map as a pointer if requested. Add a flag marking the
7501   /// map as the first one of a series of maps that relate to the same map
7502   /// expression.
7503   OpenMPOffloadMappingFlags getMapTypeBits(
7504       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7505       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7506     OpenMPOffloadMappingFlags Bits =
7507         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7508     switch (MapType) {
7509     case OMPC_MAP_alloc:
7510     case OMPC_MAP_release:
7511       // alloc and release is the default behavior in the runtime library,  i.e.
7512       // if we don't pass any bits alloc/release that is what the runtime is
7513       // going to do. Therefore, we don't need to signal anything for these two
7514       // type modifiers.
7515       break;
7516     case OMPC_MAP_to:
7517       Bits |= OMP_MAP_TO;
7518       break;
7519     case OMPC_MAP_from:
7520       Bits |= OMP_MAP_FROM;
7521       break;
7522     case OMPC_MAP_tofrom:
7523       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7524       break;
7525     case OMPC_MAP_delete:
7526       Bits |= OMP_MAP_DELETE;
7527       break;
7528     case OMPC_MAP_unknown:
7529       llvm_unreachable("Unexpected map type!");
7530     }
7531     if (AddPtrFlag)
7532       Bits |= OMP_MAP_PTR_AND_OBJ;
7533     if (AddIsTargetParamFlag)
7534       Bits |= OMP_MAP_TARGET_PARAM;
7535     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7536         != MapModifiers.end())
7537       Bits |= OMP_MAP_ALWAYS;
7538     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7539         != MapModifiers.end())
7540       Bits |= OMP_MAP_CLOSE;
7541     return Bits;
7542   }
7543 
7544   /// Return true if the provided expression is a final array section. A
7545   /// final array section, is one whose length can't be proved to be one.
7546   bool isFinalArraySectionExpression(const Expr *E) const {
7547     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7548 
7549     // It is not an array section and therefore not a unity-size one.
7550     if (!OASE)
7551       return false;
7552 
7553     // An array section with no colon always refer to a single element.
7554     if (OASE->getColonLoc().isInvalid())
7555       return false;
7556 
7557     const Expr *Length = OASE->getLength();
7558 
7559     // If we don't have a length we have to check if the array has size 1
7560     // for this dimension. Also, we should always expect a length if the
7561     // base type is pointer.
7562     if (!Length) {
7563       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7564                              OASE->getBase()->IgnoreParenImpCasts())
7565                              .getCanonicalType();
7566       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7567         return ATy->getSize().getSExtValue() != 1;
7568       // If we don't have a constant dimension length, we have to consider
7569       // the current section as having any size, so it is not necessarily
7570       // unitary. If it happen to be unity size, that's user fault.
7571       return true;
7572     }
7573 
7574     // Check if the length evaluates to 1.
7575     Expr::EvalResult Result;
7576     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7577       return true; // Can have more that size 1.
7578 
7579     llvm::APSInt ConstLength = Result.Val.getInt();
7580     return ConstLength.getSExtValue() != 1;
7581   }
7582 
7583   /// Generate the base pointers, section pointers, sizes and map type
7584   /// bits for the provided map type, map modifier, and expression components.
7585   /// \a IsFirstComponent should be set to true if the provided set of
7586   /// components is the first associated with a capture.
7587   void generateInfoForComponentList(
7588       OpenMPMapClauseKind MapType,
7589       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7590       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7591       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7592       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7593       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7594       bool IsImplicit,
7595       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7596           OverlappedElements = llvm::None) const {
7597     // The following summarizes what has to be generated for each map and the
7598     // types below. The generated information is expressed in this order:
7599     // base pointer, section pointer, size, flags
7600     // (to add to the ones that come from the map type and modifier).
7601     //
7602     // double d;
7603     // int i[100];
7604     // float *p;
7605     //
7606     // struct S1 {
7607     //   int i;
7608     //   float f[50];
7609     // }
7610     // struct S2 {
7611     //   int i;
7612     //   float f[50];
7613     //   S1 s;
7614     //   double *p;
7615     //   struct S2 *ps;
7616     // }
7617     // S2 s;
7618     // S2 *ps;
7619     //
7620     // map(d)
7621     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7622     //
7623     // map(i)
7624     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7625     //
7626     // map(i[1:23])
7627     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7628     //
7629     // map(p)
7630     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7631     //
7632     // map(p[1:24])
7633     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7634     //
7635     // map(s)
7636     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7637     //
7638     // map(s.i)
7639     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7640     //
7641     // map(s.s.f)
7642     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7643     //
7644     // map(s.p)
7645     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7646     //
7647     // map(to: s.p[:22])
7648     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7649     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7650     // &(s.p), &(s.p[0]), 22*sizeof(double),
7651     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7652     // (*) alloc space for struct members, only this is a target parameter
7653     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7654     //      optimizes this entry out, same in the examples below)
7655     // (***) map the pointee (map: to)
7656     //
7657     // map(s.ps)
7658     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7659     //
7660     // map(from: s.ps->s.i)
7661     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7662     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7663     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7664     //
7665     // map(to: s.ps->ps)
7666     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7667     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7668     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7669     //
7670     // map(s.ps->ps->ps)
7671     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7672     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7673     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7674     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7675     //
7676     // map(to: s.ps->ps->s.f[:22])
7677     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7678     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7679     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7680     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7681     //
7682     // map(ps)
7683     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7684     //
7685     // map(ps->i)
7686     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7687     //
7688     // map(ps->s.f)
7689     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7690     //
7691     // map(from: ps->p)
7692     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7693     //
7694     // map(to: ps->p[:22])
7695     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7696     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7697     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7698     //
7699     // map(ps->ps)
7700     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7701     //
7702     // map(from: ps->ps->s.i)
7703     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7704     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7705     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7706     //
7707     // map(from: ps->ps->ps)
7708     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7709     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7710     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7711     //
7712     // map(ps->ps->ps->ps)
7713     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7714     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7715     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7716     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7717     //
7718     // map(to: ps->ps->ps->s.f[:22])
7719     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7720     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7721     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7722     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7723     //
7724     // map(to: s.f[:22]) map(from: s.p[:33])
7725     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7726     //     sizeof(double*) (**), TARGET_PARAM
7727     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7728     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7729     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7730     // (*) allocate contiguous space needed to fit all mapped members even if
7731     //     we allocate space for members not mapped (in this example,
7732     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7733     //     them as well because they fall between &s.f[0] and &s.p)
7734     //
7735     // map(from: s.f[:22]) map(to: ps->p[:33])
7736     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7737     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7738     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7739     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7740     // (*) the struct this entry pertains to is the 2nd element in the list of
7741     //     arguments, hence MEMBER_OF(2)
7742     //
7743     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7744     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7745     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7746     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7747     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7748     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7749     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7750     // (*) the struct this entry pertains to is the 4th element in the list
7751     //     of arguments, hence MEMBER_OF(4)
7752 
7753     // Track if the map information being generated is the first for a capture.
7754     bool IsCaptureFirstInfo = IsFirstComponentList;
7755     // When the variable is on a declare target link or in a to clause with
7756     // unified memory, a reference is needed to hold the host/device address
7757     // of the variable.
7758     bool RequiresReference = false;
7759 
7760     // Scan the components from the base to the complete expression.
7761     auto CI = Components.rbegin();
7762     auto CE = Components.rend();
7763     auto I = CI;
7764 
7765     // Track if the map information being generated is the first for a list of
7766     // components.
7767     bool IsExpressionFirstInfo = true;
7768     Address BP = Address::invalid();
7769     const Expr *AssocExpr = I->getAssociatedExpression();
7770     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7771     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7772 
7773     if (isa<MemberExpr>(AssocExpr)) {
7774       // The base is the 'this' pointer. The content of the pointer is going
7775       // to be the base of the field being mapped.
7776       BP = CGF.LoadCXXThisAddress();
7777     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7778                (OASE &&
7779                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7780       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7781     } else {
7782       // The base is the reference to the variable.
7783       // BP = &Var.
7784       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7785       if (const auto *VD =
7786               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7787         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7788                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7789           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7790               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7791                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7792             RequiresReference = true;
7793             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7794           }
7795         }
7796       }
7797 
7798       // If the variable is a pointer and is being dereferenced (i.e. is not
7799       // the last component), the base has to be the pointer itself, not its
7800       // reference. References are ignored for mapping purposes.
7801       QualType Ty =
7802           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7803       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7804         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7805 
7806         // We do not need to generate individual map information for the
7807         // pointer, it can be associated with the combined storage.
7808         ++I;
7809       }
7810     }
7811 
7812     // Track whether a component of the list should be marked as MEMBER_OF some
7813     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7814     // in a component list should be marked as MEMBER_OF, all subsequent entries
7815     // do not belong to the base struct. E.g.
7816     // struct S2 s;
7817     // s.ps->ps->ps->f[:]
7818     //   (1) (2) (3) (4)
7819     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7820     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7821     // is the pointee of ps(2) which is not member of struct s, so it should not
7822     // be marked as such (it is still PTR_AND_OBJ).
7823     // The variable is initialized to false so that PTR_AND_OBJ entries which
7824     // are not struct members are not considered (e.g. array of pointers to
7825     // data).
7826     bool ShouldBeMemberOf = false;
7827 
7828     // Variable keeping track of whether or not we have encountered a component
7829     // in the component list which is a member expression. Useful when we have a
7830     // pointer or a final array section, in which case it is the previous
7831     // component in the list which tells us whether we have a member expression.
7832     // E.g. X.f[:]
7833     // While processing the final array section "[:]" it is "f" which tells us
7834     // whether we are dealing with a member of a declared struct.
7835     const MemberExpr *EncounteredME = nullptr;
7836 
7837     for (; I != CE; ++I) {
7838       // If the current component is member of a struct (parent struct) mark it.
7839       if (!EncounteredME) {
7840         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7841         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7842         // as MEMBER_OF the parent struct.
7843         if (EncounteredME)
7844           ShouldBeMemberOf = true;
7845       }
7846 
7847       auto Next = std::next(I);
7848 
7849       // We need to generate the addresses and sizes if this is the last
7850       // component, if the component is a pointer or if it is an array section
7851       // whose length can't be proved to be one. If this is a pointer, it
7852       // becomes the base address for the following components.
7853 
7854       // A final array section, is one whose length can't be proved to be one.
7855       bool IsFinalArraySection =
7856           isFinalArraySectionExpression(I->getAssociatedExpression());
7857 
7858       // Get information on whether the element is a pointer. Have to do a
7859       // special treatment for array sections given that they are built-in
7860       // types.
7861       const auto *OASE =
7862           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7863       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7864       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7865       bool IsPointer =
7866           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7867                        .getCanonicalType()
7868                        ->isAnyPointerType()) ||
7869           I->getAssociatedExpression()->getType()->isAnyPointerType();
7870       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7871 
7872       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7873         // If this is not the last component, we expect the pointer to be
7874         // associated with an array expression or member expression.
7875         assert((Next == CE ||
7876                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7877                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7878                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7879                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7880                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7881                "Unexpected expression");
7882 
7883         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7884                          .getAddress(CGF);
7885 
7886         // If this component is a pointer inside the base struct then we don't
7887         // need to create any entry for it - it will be combined with the object
7888         // it is pointing to into a single PTR_AND_OBJ entry.
7889         bool IsMemberPointer =
7890             IsPointer && EncounteredME &&
7891             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7892              EncounteredME);
7893         if (!OverlappedElements.empty()) {
7894           // Handle base element with the info for overlapped elements.
7895           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7896           assert(Next == CE &&
7897                  "Expected last element for the overlapped elements.");
7898           assert(!IsPointer &&
7899                  "Unexpected base element with the pointer type.");
7900           // Mark the whole struct as the struct that requires allocation on the
7901           // device.
7902           PartialStruct.LowestElem = {0, LB};
7903           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7904               I->getAssociatedExpression()->getType());
7905           Address HB = CGF.Builder.CreateConstGEP(
7906               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7907                                                               CGF.VoidPtrTy),
7908               TypeSize.getQuantity() - 1);
7909           PartialStruct.HighestElem = {
7910               std::numeric_limits<decltype(
7911                   PartialStruct.HighestElem.first)>::max(),
7912               HB};
7913           PartialStruct.Base = BP;
7914           // Emit data for non-overlapped data.
7915           OpenMPOffloadMappingFlags Flags =
7916               OMP_MAP_MEMBER_OF |
7917               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7918                              /*AddPtrFlag=*/false,
7919                              /*AddIsTargetParamFlag=*/false);
7920           LB = BP;
7921           llvm::Value *Size = nullptr;
7922           // Do bitcopy of all non-overlapped structure elements.
7923           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7924                    Component : OverlappedElements) {
7925             Address ComponentLB = Address::invalid();
7926             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7927                  Component) {
7928               if (MC.getAssociatedDeclaration()) {
7929                 ComponentLB =
7930                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7931                         .getAddress(CGF);
7932                 Size = CGF.Builder.CreatePtrDiff(
7933                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7934                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7935                 break;
7936               }
7937             }
7938             BasePointers.push_back(BP.getPointer());
7939             Pointers.push_back(LB.getPointer());
7940             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7941                                                       /*isSigned=*/true));
7942             Types.push_back(Flags);
7943             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7944           }
7945           BasePointers.push_back(BP.getPointer());
7946           Pointers.push_back(LB.getPointer());
7947           Size = CGF.Builder.CreatePtrDiff(
7948               CGF.EmitCastToVoidPtr(
7949                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7950               CGF.EmitCastToVoidPtr(LB.getPointer()));
7951           Sizes.push_back(
7952               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7953           Types.push_back(Flags);
7954           break;
7955         }
7956         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7957         if (!IsMemberPointer) {
7958           BasePointers.push_back(BP.getPointer());
7959           Pointers.push_back(LB.getPointer());
7960           Sizes.push_back(
7961               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7962 
7963           // We need to add a pointer flag for each map that comes from the
7964           // same expression except for the first one. We also need to signal
7965           // this map is the first one that relates with the current capture
7966           // (there is a set of entries for each capture).
7967           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7968               MapType, MapModifiers, IsImplicit,
7969               !IsExpressionFirstInfo || RequiresReference,
7970               IsCaptureFirstInfo && !RequiresReference);
7971 
7972           if (!IsExpressionFirstInfo) {
7973             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7974             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7975             if (IsPointer)
7976               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7977                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7978 
7979             if (ShouldBeMemberOf) {
7980               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7981               // should be later updated with the correct value of MEMBER_OF.
7982               Flags |= OMP_MAP_MEMBER_OF;
7983               // From now on, all subsequent PTR_AND_OBJ entries should not be
7984               // marked as MEMBER_OF.
7985               ShouldBeMemberOf = false;
7986             }
7987           }
7988 
7989           Types.push_back(Flags);
7990         }
7991 
7992         // If we have encountered a member expression so far, keep track of the
7993         // mapped member. If the parent is "*this", then the value declaration
7994         // is nullptr.
7995         if (EncounteredME) {
7996           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7997           unsigned FieldIndex = FD->getFieldIndex();
7998 
7999           // Update info about the lowest and highest elements for this struct
8000           if (!PartialStruct.Base.isValid()) {
8001             PartialStruct.LowestElem = {FieldIndex, LB};
8002             PartialStruct.HighestElem = {FieldIndex, LB};
8003             PartialStruct.Base = BP;
8004           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8005             PartialStruct.LowestElem = {FieldIndex, LB};
8006           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8007             PartialStruct.HighestElem = {FieldIndex, LB};
8008           }
8009         }
8010 
8011         // If we have a final array section, we are done with this expression.
8012         if (IsFinalArraySection)
8013           break;
8014 
8015         // The pointer becomes the base for the next element.
8016         if (Next != CE)
8017           BP = LB;
8018 
8019         IsExpressionFirstInfo = false;
8020         IsCaptureFirstInfo = false;
8021       }
8022     }
8023   }
8024 
8025   /// Return the adjusted map modifiers if the declaration a capture refers to
8026   /// appears in a first-private clause. This is expected to be used only with
8027   /// directives that start with 'target'.
8028   MappableExprsHandler::OpenMPOffloadMappingFlags
8029   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8030     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8031 
8032     // A first private variable captured by reference will use only the
8033     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8034     // declaration is known as first-private in this handler.
8035     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8036       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8037           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8038         return MappableExprsHandler::OMP_MAP_ALWAYS |
8039                MappableExprsHandler::OMP_MAP_TO;
8040       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8041         return MappableExprsHandler::OMP_MAP_TO |
8042                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8043       return MappableExprsHandler::OMP_MAP_PRIVATE |
8044              MappableExprsHandler::OMP_MAP_TO;
8045     }
8046     return MappableExprsHandler::OMP_MAP_TO |
8047            MappableExprsHandler::OMP_MAP_FROM;
8048   }
8049 
8050   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8051     // Rotate by getFlagMemberOffset() bits.
8052     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8053                                                   << getFlagMemberOffset());
8054   }
8055 
8056   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8057                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8058     // If the entry is PTR_AND_OBJ but has not been marked with the special
8059     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8060     // marked as MEMBER_OF.
8061     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8062         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8063       return;
8064 
8065     // Reset the placeholder value to prepare the flag for the assignment of the
8066     // proper MEMBER_OF value.
8067     Flags &= ~OMP_MAP_MEMBER_OF;
8068     Flags |= MemberOfFlag;
8069   }
8070 
8071   void getPlainLayout(const CXXRecordDecl *RD,
8072                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8073                       bool AsBase) const {
8074     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8075 
8076     llvm::StructType *St =
8077         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8078 
8079     unsigned NumElements = St->getNumElements();
8080     llvm::SmallVector<
8081         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8082         RecordLayout(NumElements);
8083 
8084     // Fill bases.
8085     for (const auto &I : RD->bases()) {
8086       if (I.isVirtual())
8087         continue;
8088       const auto *Base = I.getType()->getAsCXXRecordDecl();
8089       // Ignore empty bases.
8090       if (Base->isEmpty() || CGF.getContext()
8091                                  .getASTRecordLayout(Base)
8092                                  .getNonVirtualSize()
8093                                  .isZero())
8094         continue;
8095 
8096       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8097       RecordLayout[FieldIndex] = Base;
8098     }
8099     // Fill in virtual bases.
8100     for (const auto &I : RD->vbases()) {
8101       const auto *Base = I.getType()->getAsCXXRecordDecl();
8102       // Ignore empty bases.
8103       if (Base->isEmpty())
8104         continue;
8105       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8106       if (RecordLayout[FieldIndex])
8107         continue;
8108       RecordLayout[FieldIndex] = Base;
8109     }
8110     // Fill in all the fields.
8111     assert(!RD->isUnion() && "Unexpected union.");
8112     for (const auto *Field : RD->fields()) {
8113       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8114       // will fill in later.)
8115       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8116         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8117         RecordLayout[FieldIndex] = Field;
8118       }
8119     }
8120     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8121              &Data : RecordLayout) {
8122       if (Data.isNull())
8123         continue;
8124       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8125         getPlainLayout(Base, Layout, /*AsBase=*/true);
8126       else
8127         Layout.push_back(Data.get<const FieldDecl *>());
8128     }
8129   }
8130 
8131 public:
8132   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8133       : CurDir(&Dir), CGF(CGF) {
8134     // Extract firstprivate clause information.
8135     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8136       for (const auto *D : C->varlists())
8137         FirstPrivateDecls.try_emplace(
8138             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8139     // Extract device pointer clause information.
8140     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8141       for (auto L : C->component_lists())
8142         DevPointersMap[L.first].push_back(L.second);
8143   }
8144 
8145   /// Constructor for the declare mapper directive.
8146   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8147       : CurDir(&Dir), CGF(CGF) {}
8148 
8149   /// Generate code for the combined entry if we have a partially mapped struct
8150   /// and take care of the mapping flags of the arguments corresponding to
8151   /// individual struct members.
8152   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8153                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8155                          const StructRangeInfoTy &PartialStruct) const {
8156     // Base is the base of the struct
8157     BasePointers.push_back(PartialStruct.Base.getPointer());
8158     // Pointer is the address of the lowest element
8159     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8160     Pointers.push_back(LB);
8161     // Size is (addr of {highest+1} element) - (addr of lowest element)
8162     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8163     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8164     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8165     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8166     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8167     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8168                                                   /*isSigned=*/false);
8169     Sizes.push_back(Size);
8170     // Map type is always TARGET_PARAM
8171     Types.push_back(OMP_MAP_TARGET_PARAM);
8172     // Remove TARGET_PARAM flag from the first element
8173     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8174 
8175     // All other current entries will be MEMBER_OF the combined entry
8176     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8177     // 0xFFFF in the MEMBER_OF field).
8178     OpenMPOffloadMappingFlags MemberOfFlag =
8179         getMemberOfFlag(BasePointers.size() - 1);
8180     for (auto &M : CurTypes)
8181       setCorrectMemberOfFlag(M, MemberOfFlag);
8182   }
8183 
8184   /// Generate all the base pointers, section pointers, sizes and map
8185   /// types for the extracted mappable expressions. Also, for each item that
8186   /// relates with a device pointer, a pair of the relevant declaration and
8187   /// index where it occurs is appended to the device pointers info array.
8188   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8189                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8190                        MapFlagsArrayTy &Types) const {
8191     // We have to process the component lists that relate with the same
8192     // declaration in a single chunk so that we can generate the map flags
8193     // correctly. Therefore, we organize all lists in a map.
8194     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8195 
8196     // Helper function to fill the information map for the different supported
8197     // clauses.
8198     auto &&InfoGen = [&Info](
8199         const ValueDecl *D,
8200         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8201         OpenMPMapClauseKind MapType,
8202         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8203         bool ReturnDevicePointer, bool IsImplicit) {
8204       const ValueDecl *VD =
8205           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8206       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8207                             IsImplicit);
8208     };
8209 
8210     assert(CurDir.is<const OMPExecutableDirective *>() &&
8211            "Expect a executable directive");
8212     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8213     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8214       for (const auto L : C->component_lists()) {
8215         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8216             /*ReturnDevicePointer=*/false, C->isImplicit());
8217       }
8218     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8219       for (const auto L : C->component_lists()) {
8220         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8221             /*ReturnDevicePointer=*/false, C->isImplicit());
8222       }
8223     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8224       for (const auto L : C->component_lists()) {
8225         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8226             /*ReturnDevicePointer=*/false, C->isImplicit());
8227       }
8228 
8229     // Look at the use_device_ptr clause information and mark the existing map
8230     // entries as such. If there is no map information for an entry in the
8231     // use_device_ptr list, we create one with map type 'alloc' and zero size
8232     // section. It is the user fault if that was not mapped before. If there is
8233     // no map information and the pointer is a struct member, then we defer the
8234     // emission of that entry until the whole struct has been processed.
8235     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8236         DeferredInfo;
8237 
8238     for (const auto *C :
8239          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8240       for (const auto L : C->component_lists()) {
8241         assert(!L.second.empty() && "Not expecting empty list of components!");
8242         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8243         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8244         const Expr *IE = L.second.back().getAssociatedExpression();
8245         // If the first component is a member expression, we have to look into
8246         // 'this', which maps to null in the map of map information. Otherwise
8247         // look directly for the information.
8248         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8249 
8250         // We potentially have map information for this declaration already.
8251         // Look for the first set of components that refer to it.
8252         if (It != Info.end()) {
8253           auto CI = std::find_if(
8254               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8255                 return MI.Components.back().getAssociatedDeclaration() == VD;
8256               });
8257           // If we found a map entry, signal that the pointer has to be returned
8258           // and move on to the next declaration.
8259           if (CI != It->second.end()) {
8260             CI->ReturnDevicePointer = true;
8261             continue;
8262           }
8263         }
8264 
8265         // We didn't find any match in our map information - generate a zero
8266         // size array section - if the pointer is a struct member we defer this
8267         // action until the whole struct has been processed.
8268         if (isa<MemberExpr>(IE)) {
8269           // Insert the pointer into Info to be processed by
8270           // generateInfoForComponentList. Because it is a member pointer
8271           // without a pointee, no entry will be generated for it, therefore
8272           // we need to generate one after the whole struct has been processed.
8273           // Nonetheless, generateInfoForComponentList must be called to take
8274           // the pointer into account for the calculation of the range of the
8275           // partial struct.
8276           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8277                   /*ReturnDevicePointer=*/false, C->isImplicit());
8278           DeferredInfo[nullptr].emplace_back(IE, VD);
8279         } else {
8280           llvm::Value *Ptr =
8281               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8282           BasePointers.emplace_back(Ptr, VD);
8283           Pointers.push_back(Ptr);
8284           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8285           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8286         }
8287       }
8288     }
8289 
8290     for (const auto &M : Info) {
8291       // We need to know when we generate information for the first component
8292       // associated with a capture, because the mapping flags depend on it.
8293       bool IsFirstComponentList = true;
8294 
8295       // Temporary versions of arrays
8296       MapBaseValuesArrayTy CurBasePointers;
8297       MapValuesArrayTy CurPointers;
8298       MapValuesArrayTy CurSizes;
8299       MapFlagsArrayTy CurTypes;
8300       StructRangeInfoTy PartialStruct;
8301 
8302       for (const MapInfo &L : M.second) {
8303         assert(!L.Components.empty() &&
8304                "Not expecting declaration with no component lists.");
8305 
8306         // Remember the current base pointer index.
8307         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8308         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8309                                      CurBasePointers, CurPointers, CurSizes,
8310                                      CurTypes, PartialStruct,
8311                                      IsFirstComponentList, L.IsImplicit);
8312 
8313         // If this entry relates with a device pointer, set the relevant
8314         // declaration and add the 'return pointer' flag.
8315         if (L.ReturnDevicePointer) {
8316           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8317                  "Unexpected number of mapped base pointers.");
8318 
8319           const ValueDecl *RelevantVD =
8320               L.Components.back().getAssociatedDeclaration();
8321           assert(RelevantVD &&
8322                  "No relevant declaration related with device pointer??");
8323 
8324           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8325           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8326         }
8327         IsFirstComponentList = false;
8328       }
8329 
8330       // Append any pending zero-length pointers which are struct members and
8331       // used with use_device_ptr.
8332       auto CI = DeferredInfo.find(M.first);
8333       if (CI != DeferredInfo.end()) {
8334         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8335           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8336           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8337               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8338           CurBasePointers.emplace_back(BasePtr, L.VD);
8339           CurPointers.push_back(Ptr);
8340           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8341           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8342           // value MEMBER_OF=FFFF so that the entry is later updated with the
8343           // correct value of MEMBER_OF.
8344           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8345                              OMP_MAP_MEMBER_OF);
8346         }
8347       }
8348 
8349       // If there is an entry in PartialStruct it means we have a struct with
8350       // individual members mapped. Emit an extra combined entry.
8351       if (PartialStruct.Base.isValid())
8352         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8353                           PartialStruct);
8354 
8355       // We need to append the results of this capture to what we already have.
8356       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8357       Pointers.append(CurPointers.begin(), CurPointers.end());
8358       Sizes.append(CurSizes.begin(), CurSizes.end());
8359       Types.append(CurTypes.begin(), CurTypes.end());
8360     }
8361   }
8362 
8363   /// Generate all the base pointers, section pointers, sizes and map types for
8364   /// the extracted map clauses of user-defined mapper.
8365   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8366                                 MapValuesArrayTy &Pointers,
8367                                 MapValuesArrayTy &Sizes,
8368                                 MapFlagsArrayTy &Types) const {
8369     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8370            "Expect a declare mapper directive");
8371     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8372     // We have to process the component lists that relate with the same
8373     // declaration in a single chunk so that we can generate the map flags
8374     // correctly. Therefore, we organize all lists in a map.
8375     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8376 
8377     // Helper function to fill the information map for the different supported
8378     // clauses.
8379     auto &&InfoGen = [&Info](
8380         const ValueDecl *D,
8381         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8382         OpenMPMapClauseKind MapType,
8383         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8384         bool ReturnDevicePointer, bool IsImplicit) {
8385       const ValueDecl *VD =
8386           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8387       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8388                             IsImplicit);
8389     };
8390 
8391     for (const auto *C : CurMapperDir->clauselists()) {
8392       const auto *MC = cast<OMPMapClause>(C);
8393       for (const auto L : MC->component_lists()) {
8394         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8395                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8396       }
8397     }
8398 
8399     for (const auto &M : Info) {
8400       // We need to know when we generate information for the first component
8401       // associated with a capture, because the mapping flags depend on it.
8402       bool IsFirstComponentList = true;
8403 
8404       // Temporary versions of arrays
8405       MapBaseValuesArrayTy CurBasePointers;
8406       MapValuesArrayTy CurPointers;
8407       MapValuesArrayTy CurSizes;
8408       MapFlagsArrayTy CurTypes;
8409       StructRangeInfoTy PartialStruct;
8410 
8411       for (const MapInfo &L : M.second) {
8412         assert(!L.Components.empty() &&
8413                "Not expecting declaration with no component lists.");
8414         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8415                                      CurBasePointers, CurPointers, CurSizes,
8416                                      CurTypes, PartialStruct,
8417                                      IsFirstComponentList, L.IsImplicit);
8418         IsFirstComponentList = false;
8419       }
8420 
8421       // If there is an entry in PartialStruct it means we have a struct with
8422       // individual members mapped. Emit an extra combined entry.
8423       if (PartialStruct.Base.isValid())
8424         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8425                           PartialStruct);
8426 
8427       // We need to append the results of this capture to what we already have.
8428       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8429       Pointers.append(CurPointers.begin(), CurPointers.end());
8430       Sizes.append(CurSizes.begin(), CurSizes.end());
8431       Types.append(CurTypes.begin(), CurTypes.end());
8432     }
8433   }
8434 
8435   /// Emit capture info for lambdas for variables captured by reference.
8436   void generateInfoForLambdaCaptures(
8437       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8438       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8439       MapFlagsArrayTy &Types,
8440       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8441     const auto *RD = VD->getType()
8442                          .getCanonicalType()
8443                          .getNonReferenceType()
8444                          ->getAsCXXRecordDecl();
8445     if (!RD || !RD->isLambda())
8446       return;
8447     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8448     LValue VDLVal = CGF.MakeAddrLValue(
8449         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8450     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8451     FieldDecl *ThisCapture = nullptr;
8452     RD->getCaptureFields(Captures, ThisCapture);
8453     if (ThisCapture) {
8454       LValue ThisLVal =
8455           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8456       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8457       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8458                                  VDLVal.getPointer(CGF));
8459       BasePointers.push_back(ThisLVal.getPointer(CGF));
8460       Pointers.push_back(ThisLValVal.getPointer(CGF));
8461       Sizes.push_back(
8462           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8463                                     CGF.Int64Ty, /*isSigned=*/true));
8464       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8465                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8466     }
8467     for (const LambdaCapture &LC : RD->captures()) {
8468       if (!LC.capturesVariable())
8469         continue;
8470       const VarDecl *VD = LC.getCapturedVar();
8471       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8472         continue;
8473       auto It = Captures.find(VD);
8474       assert(It != Captures.end() && "Found lambda capture without field.");
8475       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8476       if (LC.getCaptureKind() == LCK_ByRef) {
8477         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8478         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8479                                    VDLVal.getPointer(CGF));
8480         BasePointers.push_back(VarLVal.getPointer(CGF));
8481         Pointers.push_back(VarLValVal.getPointer(CGF));
8482         Sizes.push_back(CGF.Builder.CreateIntCast(
8483             CGF.getTypeSize(
8484                 VD->getType().getCanonicalType().getNonReferenceType()),
8485             CGF.Int64Ty, /*isSigned=*/true));
8486       } else {
8487         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8488         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8489                                    VDLVal.getPointer(CGF));
8490         BasePointers.push_back(VarLVal.getPointer(CGF));
8491         Pointers.push_back(VarRVal.getScalarVal());
8492         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8493       }
8494       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8495                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8496     }
8497   }
8498 
8499   /// Set correct indices for lambdas captures.
8500   void adjustMemberOfForLambdaCaptures(
8501       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8502       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8503       MapFlagsArrayTy &Types) const {
8504     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8505       // Set correct member_of idx for all implicit lambda captures.
8506       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8507                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8508         continue;
8509       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8510       assert(BasePtr && "Unable to find base lambda address.");
8511       int TgtIdx = -1;
8512       for (unsigned J = I; J > 0; --J) {
8513         unsigned Idx = J - 1;
8514         if (Pointers[Idx] != BasePtr)
8515           continue;
8516         TgtIdx = Idx;
8517         break;
8518       }
8519       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8520       // All other current entries will be MEMBER_OF the combined entry
8521       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8522       // 0xFFFF in the MEMBER_OF field).
8523       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8524       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8525     }
8526   }
8527 
8528   /// Generate the base pointers, section pointers, sizes and map types
8529   /// associated to a given capture.
8530   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8531                               llvm::Value *Arg,
8532                               MapBaseValuesArrayTy &BasePointers,
8533                               MapValuesArrayTy &Pointers,
8534                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8535                               StructRangeInfoTy &PartialStruct) const {
8536     assert(!Cap->capturesVariableArrayType() &&
8537            "Not expecting to generate map info for a variable array type!");
8538 
8539     // We need to know when we generating information for the first component
8540     const ValueDecl *VD = Cap->capturesThis()
8541                               ? nullptr
8542                               : Cap->getCapturedVar()->getCanonicalDecl();
8543 
8544     // If this declaration appears in a is_device_ptr clause we just have to
8545     // pass the pointer by value. If it is a reference to a declaration, we just
8546     // pass its value.
8547     if (DevPointersMap.count(VD)) {
8548       BasePointers.emplace_back(Arg, VD);
8549       Pointers.push_back(Arg);
8550       Sizes.push_back(
8551           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8552                                     CGF.Int64Ty, /*isSigned=*/true));
8553       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8554       return;
8555     }
8556 
8557     using MapData =
8558         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8559                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8560     SmallVector<MapData, 4> DeclComponentLists;
8561     assert(CurDir.is<const OMPExecutableDirective *>() &&
8562            "Expect a executable directive");
8563     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8564     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8565       for (const auto L : C->decl_component_lists(VD)) {
8566         assert(L.first == VD &&
8567                "We got information for the wrong declaration??");
8568         assert(!L.second.empty() &&
8569                "Not expecting declaration with no component lists.");
8570         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8571                                         C->getMapTypeModifiers(),
8572                                         C->isImplicit());
8573       }
8574     }
8575 
8576     // Find overlapping elements (including the offset from the base element).
8577     llvm::SmallDenseMap<
8578         const MapData *,
8579         llvm::SmallVector<
8580             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8581         4>
8582         OverlappedData;
8583     size_t Count = 0;
8584     for (const MapData &L : DeclComponentLists) {
8585       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8586       OpenMPMapClauseKind MapType;
8587       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8588       bool IsImplicit;
8589       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8590       ++Count;
8591       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8592         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8593         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8594         auto CI = Components.rbegin();
8595         auto CE = Components.rend();
8596         auto SI = Components1.rbegin();
8597         auto SE = Components1.rend();
8598         for (; CI != CE && SI != SE; ++CI, ++SI) {
8599           if (CI->getAssociatedExpression()->getStmtClass() !=
8600               SI->getAssociatedExpression()->getStmtClass())
8601             break;
8602           // Are we dealing with different variables/fields?
8603           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8604             break;
8605         }
8606         // Found overlapping if, at least for one component, reached the head of
8607         // the components list.
8608         if (CI == CE || SI == SE) {
8609           assert((CI != CE || SI != SE) &&
8610                  "Unexpected full match of the mapping components.");
8611           const MapData &BaseData = CI == CE ? L : L1;
8612           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8613               SI == SE ? Components : Components1;
8614           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8615           OverlappedElements.getSecond().push_back(SubData);
8616         }
8617       }
8618     }
8619     // Sort the overlapped elements for each item.
8620     llvm::SmallVector<const FieldDecl *, 4> Layout;
8621     if (!OverlappedData.empty()) {
8622       if (const auto *CRD =
8623               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8624         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8625       else {
8626         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8627         Layout.append(RD->field_begin(), RD->field_end());
8628       }
8629     }
8630     for (auto &Pair : OverlappedData) {
8631       llvm::sort(
8632           Pair.getSecond(),
8633           [&Layout](
8634               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8635               OMPClauseMappableExprCommon::MappableExprComponentListRef
8636                   Second) {
8637             auto CI = First.rbegin();
8638             auto CE = First.rend();
8639             auto SI = Second.rbegin();
8640             auto SE = Second.rend();
8641             for (; CI != CE && SI != SE; ++CI, ++SI) {
8642               if (CI->getAssociatedExpression()->getStmtClass() !=
8643                   SI->getAssociatedExpression()->getStmtClass())
8644                 break;
8645               // Are we dealing with different variables/fields?
8646               if (CI->getAssociatedDeclaration() !=
8647                   SI->getAssociatedDeclaration())
8648                 break;
8649             }
8650 
8651             // Lists contain the same elements.
8652             if (CI == CE && SI == SE)
8653               return false;
8654 
8655             // List with less elements is less than list with more elements.
8656             if (CI == CE || SI == SE)
8657               return CI == CE;
8658 
8659             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8660             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8661             if (FD1->getParent() == FD2->getParent())
8662               return FD1->getFieldIndex() < FD2->getFieldIndex();
8663             const auto It =
8664                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8665                   return FD == FD1 || FD == FD2;
8666                 });
8667             return *It == FD1;
8668           });
8669     }
8670 
8671     // Associated with a capture, because the mapping flags depend on it.
8672     // Go through all of the elements with the overlapped elements.
8673     for (const auto &Pair : OverlappedData) {
8674       const MapData &L = *Pair.getFirst();
8675       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8676       OpenMPMapClauseKind MapType;
8677       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8678       bool IsImplicit;
8679       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8680       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8681           OverlappedComponents = Pair.getSecond();
8682       bool IsFirstComponentList = true;
8683       generateInfoForComponentList(MapType, MapModifiers, Components,
8684                                    BasePointers, Pointers, Sizes, Types,
8685                                    PartialStruct, IsFirstComponentList,
8686                                    IsImplicit, OverlappedComponents);
8687     }
8688     // Go through other elements without overlapped elements.
8689     bool IsFirstComponentList = OverlappedData.empty();
8690     for (const MapData &L : DeclComponentLists) {
8691       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8692       OpenMPMapClauseKind MapType;
8693       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8694       bool IsImplicit;
8695       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8696       auto It = OverlappedData.find(&L);
8697       if (It == OverlappedData.end())
8698         generateInfoForComponentList(MapType, MapModifiers, Components,
8699                                      BasePointers, Pointers, Sizes, Types,
8700                                      PartialStruct, IsFirstComponentList,
8701                                      IsImplicit);
8702       IsFirstComponentList = false;
8703     }
8704   }
8705 
8706   /// Generate the base pointers, section pointers, sizes and map types
8707   /// associated with the declare target link variables.
8708   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8709                                         MapValuesArrayTy &Pointers,
8710                                         MapValuesArrayTy &Sizes,
8711                                         MapFlagsArrayTy &Types) const {
8712     assert(CurDir.is<const OMPExecutableDirective *>() &&
8713            "Expect a executable directive");
8714     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8715     // Map other list items in the map clause which are not captured variables
8716     // but "declare target link" global variables.
8717     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8718       for (const auto L : C->component_lists()) {
8719         if (!L.first)
8720           continue;
8721         const auto *VD = dyn_cast<VarDecl>(L.first);
8722         if (!VD)
8723           continue;
8724         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8725             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8726         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8727             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8728           continue;
8729         StructRangeInfoTy PartialStruct;
8730         generateInfoForComponentList(
8731             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8732             Pointers, Sizes, Types, PartialStruct,
8733             /*IsFirstComponentList=*/true, C->isImplicit());
8734         assert(!PartialStruct.Base.isValid() &&
8735                "No partial structs for declare target link expected.");
8736       }
8737     }
8738   }
8739 
8740   /// Generate the default map information for a given capture \a CI,
8741   /// record field declaration \a RI and captured value \a CV.
8742   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8743                               const FieldDecl &RI, llvm::Value *CV,
8744                               MapBaseValuesArrayTy &CurBasePointers,
8745                               MapValuesArrayTy &CurPointers,
8746                               MapValuesArrayTy &CurSizes,
8747                               MapFlagsArrayTy &CurMapTypes) const {
8748     bool IsImplicit = true;
8749     // Do the default mapping.
8750     if (CI.capturesThis()) {
8751       CurBasePointers.push_back(CV);
8752       CurPointers.push_back(CV);
8753       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8754       CurSizes.push_back(
8755           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8756                                     CGF.Int64Ty, /*isSigned=*/true));
8757       // Default map type.
8758       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8759     } else if (CI.capturesVariableByCopy()) {
8760       CurBasePointers.push_back(CV);
8761       CurPointers.push_back(CV);
8762       if (!RI.getType()->isAnyPointerType()) {
8763         // We have to signal to the runtime captures passed by value that are
8764         // not pointers.
8765         CurMapTypes.push_back(OMP_MAP_LITERAL);
8766         CurSizes.push_back(CGF.Builder.CreateIntCast(
8767             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8768       } else {
8769         // Pointers are implicitly mapped with a zero size and no flags
8770         // (other than first map that is added for all implicit maps).
8771         CurMapTypes.push_back(OMP_MAP_NONE);
8772         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8773       }
8774       const VarDecl *VD = CI.getCapturedVar();
8775       auto I = FirstPrivateDecls.find(VD);
8776       if (I != FirstPrivateDecls.end())
8777         IsImplicit = I->getSecond();
8778     } else {
8779       assert(CI.capturesVariable() && "Expected captured reference.");
8780       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8781       QualType ElementType = PtrTy->getPointeeType();
8782       CurSizes.push_back(CGF.Builder.CreateIntCast(
8783           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8784       // The default map type for a scalar/complex type is 'to' because by
8785       // default the value doesn't have to be retrieved. For an aggregate
8786       // type, the default is 'tofrom'.
8787       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8788       const VarDecl *VD = CI.getCapturedVar();
8789       auto I = FirstPrivateDecls.find(VD);
8790       if (I != FirstPrivateDecls.end() &&
8791           VD->getType().isConstant(CGF.getContext())) {
8792         llvm::Constant *Addr =
8793             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8794         // Copy the value of the original variable to the new global copy.
8795         CGF.Builder.CreateMemCpy(
8796             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8797             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8798             CurSizes.back(), /*IsVolatile=*/false);
8799         // Use new global variable as the base pointers.
8800         CurBasePointers.push_back(Addr);
8801         CurPointers.push_back(Addr);
8802       } else {
8803         CurBasePointers.push_back(CV);
8804         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8805           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8806               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8807               AlignmentSource::Decl));
8808           CurPointers.push_back(PtrAddr.getPointer());
8809         } else {
8810           CurPointers.push_back(CV);
8811         }
8812       }
8813       if (I != FirstPrivateDecls.end())
8814         IsImplicit = I->getSecond();
8815     }
8816     // Every default map produces a single argument which is a target parameter.
8817     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8818 
8819     // Add flag stating this is an implicit map.
8820     if (IsImplicit)
8821       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8822   }
8823 };
8824 } // anonymous namespace
8825 
8826 /// Emit the arrays used to pass the captures and map information to the
8827 /// offloading runtime library. If there is no map or capture information,
8828 /// return nullptr by reference.
8829 static void
8830 emitOffloadingArrays(CodeGenFunction &CGF,
8831                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8832                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8833                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8834                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8835                      CGOpenMPRuntime::TargetDataInfo &Info) {
8836   CodeGenModule &CGM = CGF.CGM;
8837   ASTContext &Ctx = CGF.getContext();
8838 
8839   // Reset the array information.
8840   Info.clearArrayInfo();
8841   Info.NumberOfPtrs = BasePointers.size();
8842 
8843   if (Info.NumberOfPtrs) {
8844     // Detect if we have any capture size requiring runtime evaluation of the
8845     // size so that a constant array could be eventually used.
8846     bool hasRuntimeEvaluationCaptureSize = false;
8847     for (llvm::Value *S : Sizes)
8848       if (!isa<llvm::Constant>(S)) {
8849         hasRuntimeEvaluationCaptureSize = true;
8850         break;
8851       }
8852 
8853     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8854     QualType PointerArrayType = Ctx.getConstantArrayType(
8855         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8856         /*IndexTypeQuals=*/0);
8857 
8858     Info.BasePointersArray =
8859         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8860     Info.PointersArray =
8861         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8862 
8863     // If we don't have any VLA types or other types that require runtime
8864     // evaluation, we can use a constant array for the map sizes, otherwise we
8865     // need to fill up the arrays as we do for the pointers.
8866     QualType Int64Ty =
8867         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8868     if (hasRuntimeEvaluationCaptureSize) {
8869       QualType SizeArrayType = Ctx.getConstantArrayType(
8870           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8871           /*IndexTypeQuals=*/0);
8872       Info.SizesArray =
8873           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8874     } else {
8875       // We expect all the sizes to be constant, so we collect them to create
8876       // a constant array.
8877       SmallVector<llvm::Constant *, 16> ConstSizes;
8878       for (llvm::Value *S : Sizes)
8879         ConstSizes.push_back(cast<llvm::Constant>(S));
8880 
8881       auto *SizesArrayInit = llvm::ConstantArray::get(
8882           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8883       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8884       auto *SizesArrayGbl = new llvm::GlobalVariable(
8885           CGM.getModule(), SizesArrayInit->getType(),
8886           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8887           SizesArrayInit, Name);
8888       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8889       Info.SizesArray = SizesArrayGbl;
8890     }
8891 
8892     // The map types are always constant so we don't need to generate code to
8893     // fill arrays. Instead, we create an array constant.
8894     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8895     llvm::copy(MapTypes, Mapping.begin());
8896     llvm::Constant *MapTypesArrayInit =
8897         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8898     std::string MaptypesName =
8899         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8900     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8901         CGM.getModule(), MapTypesArrayInit->getType(),
8902         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8903         MapTypesArrayInit, MaptypesName);
8904     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8905     Info.MapTypesArray = MapTypesArrayGbl;
8906 
8907     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8908       llvm::Value *BPVal = *BasePointers[I];
8909       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8910           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8911           Info.BasePointersArray, 0, I);
8912       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8913           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8914       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8915       CGF.Builder.CreateStore(BPVal, BPAddr);
8916 
8917       if (Info.requiresDevicePointerInfo())
8918         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8919           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8920 
8921       llvm::Value *PVal = Pointers[I];
8922       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8923           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8924           Info.PointersArray, 0, I);
8925       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8926           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8927       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8928       CGF.Builder.CreateStore(PVal, PAddr);
8929 
8930       if (hasRuntimeEvaluationCaptureSize) {
8931         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8932             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8933             Info.SizesArray,
8934             /*Idx0=*/0,
8935             /*Idx1=*/I);
8936         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8937         CGF.Builder.CreateStore(
8938             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8939             SAddr);
8940       }
8941     }
8942   }
8943 }
8944 
8945 /// Emit the arguments to be passed to the runtime library based on the
8946 /// arrays of pointers, sizes and map types.
8947 static void emitOffloadingArraysArgument(
8948     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8949     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8950     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8951   CodeGenModule &CGM = CGF.CGM;
8952   if (Info.NumberOfPtrs) {
8953     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8954         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8955         Info.BasePointersArray,
8956         /*Idx0=*/0, /*Idx1=*/0);
8957     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8958         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8959         Info.PointersArray,
8960         /*Idx0=*/0,
8961         /*Idx1=*/0);
8962     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8963         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8964         /*Idx0=*/0, /*Idx1=*/0);
8965     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8966         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8967         Info.MapTypesArray,
8968         /*Idx0=*/0,
8969         /*Idx1=*/0);
8970   } else {
8971     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8972     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8973     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8974     MapTypesArrayArg =
8975         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8976   }
8977 }
8978 
8979 /// Check for inner distribute directive.
8980 static const OMPExecutableDirective *
8981 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8982   const auto *CS = D.getInnermostCapturedStmt();
8983   const auto *Body =
8984       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8985   const Stmt *ChildStmt =
8986       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8987 
8988   if (const auto *NestedDir =
8989           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8990     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8991     switch (D.getDirectiveKind()) {
8992     case OMPD_target:
8993       if (isOpenMPDistributeDirective(DKind))
8994         return NestedDir;
8995       if (DKind == OMPD_teams) {
8996         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8997             /*IgnoreCaptured=*/true);
8998         if (!Body)
8999           return nullptr;
9000         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9001         if (const auto *NND =
9002                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9003           DKind = NND->getDirectiveKind();
9004           if (isOpenMPDistributeDirective(DKind))
9005             return NND;
9006         }
9007       }
9008       return nullptr;
9009     case OMPD_target_teams:
9010       if (isOpenMPDistributeDirective(DKind))
9011         return NestedDir;
9012       return nullptr;
9013     case OMPD_target_parallel:
9014     case OMPD_target_simd:
9015     case OMPD_target_parallel_for:
9016     case OMPD_target_parallel_for_simd:
9017       return nullptr;
9018     case OMPD_target_teams_distribute:
9019     case OMPD_target_teams_distribute_simd:
9020     case OMPD_target_teams_distribute_parallel_for:
9021     case OMPD_target_teams_distribute_parallel_for_simd:
9022     case OMPD_parallel:
9023     case OMPD_for:
9024     case OMPD_parallel_for:
9025     case OMPD_parallel_master:
9026     case OMPD_parallel_sections:
9027     case OMPD_for_simd:
9028     case OMPD_parallel_for_simd:
9029     case OMPD_cancel:
9030     case OMPD_cancellation_point:
9031     case OMPD_ordered:
9032     case OMPD_threadprivate:
9033     case OMPD_allocate:
9034     case OMPD_task:
9035     case OMPD_simd:
9036     case OMPD_sections:
9037     case OMPD_section:
9038     case OMPD_single:
9039     case OMPD_master:
9040     case OMPD_critical:
9041     case OMPD_taskyield:
9042     case OMPD_barrier:
9043     case OMPD_taskwait:
9044     case OMPD_taskgroup:
9045     case OMPD_atomic:
9046     case OMPD_flush:
9047     case OMPD_depobj:
9048     case OMPD_teams:
9049     case OMPD_target_data:
9050     case OMPD_target_exit_data:
9051     case OMPD_target_enter_data:
9052     case OMPD_distribute:
9053     case OMPD_distribute_simd:
9054     case OMPD_distribute_parallel_for:
9055     case OMPD_distribute_parallel_for_simd:
9056     case OMPD_teams_distribute:
9057     case OMPD_teams_distribute_simd:
9058     case OMPD_teams_distribute_parallel_for:
9059     case OMPD_teams_distribute_parallel_for_simd:
9060     case OMPD_target_update:
9061     case OMPD_declare_simd:
9062     case OMPD_declare_variant:
9063     case OMPD_declare_target:
9064     case OMPD_end_declare_target:
9065     case OMPD_declare_reduction:
9066     case OMPD_declare_mapper:
9067     case OMPD_taskloop:
9068     case OMPD_taskloop_simd:
9069     case OMPD_master_taskloop:
9070     case OMPD_master_taskloop_simd:
9071     case OMPD_parallel_master_taskloop:
9072     case OMPD_parallel_master_taskloop_simd:
9073     case OMPD_requires:
9074     case OMPD_unknown:
9075       llvm_unreachable("Unexpected directive.");
9076     }
9077   }
9078 
9079   return nullptr;
9080 }
9081 
9082 /// Emit the user-defined mapper function. The code generation follows the
9083 /// pattern in the example below.
9084 /// \code
9085 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9086 ///                                           void *base, void *begin,
9087 ///                                           int64_t size, int64_t type) {
9088 ///   // Allocate space for an array section first.
9089 ///   if (size > 1 && !maptype.IsDelete)
9090 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9091 ///                                 size*sizeof(Ty), clearToFrom(type));
9092 ///   // Map members.
9093 ///   for (unsigned i = 0; i < size; i++) {
9094 ///     // For each component specified by this mapper:
9095 ///     for (auto c : all_components) {
9096 ///       if (c.hasMapper())
9097 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9098 ///                       c.arg_type);
9099 ///       else
9100 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9101 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9102 ///     }
9103 ///   }
9104 ///   // Delete the array section.
9105 ///   if (size > 1 && maptype.IsDelete)
9106 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9107 ///                                 size*sizeof(Ty), clearToFrom(type));
9108 /// }
9109 /// \endcode
9110 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9111                                             CodeGenFunction *CGF) {
9112   if (UDMMap.count(D) > 0)
9113     return;
9114   ASTContext &C = CGM.getContext();
9115   QualType Ty = D->getType();
9116   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9117   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9118   auto *MapperVarDecl =
9119       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9120   SourceLocation Loc = D->getLocation();
9121   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9122 
9123   // Prepare mapper function arguments and attributes.
9124   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9125                               C.VoidPtrTy, ImplicitParamDecl::Other);
9126   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9127                             ImplicitParamDecl::Other);
9128   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9129                              C.VoidPtrTy, ImplicitParamDecl::Other);
9130   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9131                             ImplicitParamDecl::Other);
9132   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9133                             ImplicitParamDecl::Other);
9134   FunctionArgList Args;
9135   Args.push_back(&HandleArg);
9136   Args.push_back(&BaseArg);
9137   Args.push_back(&BeginArg);
9138   Args.push_back(&SizeArg);
9139   Args.push_back(&TypeArg);
9140   const CGFunctionInfo &FnInfo =
9141       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9142   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9143   SmallString<64> TyStr;
9144   llvm::raw_svector_ostream Out(TyStr);
9145   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9146   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9147   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9148                                     Name, &CGM.getModule());
9149   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9150   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9151   // Start the mapper function code generation.
9152   CodeGenFunction MapperCGF(CGM);
9153   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9154   // Compute the starting and end addreses of array elements.
9155   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9156       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9157       C.getPointerType(Int64Ty), Loc);
9158   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9159       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9160       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9161   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9162   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9163       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9164       C.getPointerType(Int64Ty), Loc);
9165   // Prepare common arguments for array initiation and deletion.
9166   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9167       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9168       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9169   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9170       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9171       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9172   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9173       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9174       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9175 
9176   // Emit array initiation if this is an array section and \p MapType indicates
9177   // that memory allocation is required.
9178   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9179   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9180                              ElementSize, HeadBB, /*IsInit=*/true);
9181 
9182   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9183 
9184   // Emit the loop header block.
9185   MapperCGF.EmitBlock(HeadBB);
9186   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9187   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9188   // Evaluate whether the initial condition is satisfied.
9189   llvm::Value *IsEmpty =
9190       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9191   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9192   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9193 
9194   // Emit the loop body block.
9195   MapperCGF.EmitBlock(BodyBB);
9196   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9197       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9198   PtrPHI->addIncoming(PtrBegin, EntryBB);
9199   Address PtrCurrent =
9200       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9201                           .getAlignment()
9202                           .alignmentOfArrayElement(ElementSize));
9203   // Privatize the declared variable of mapper to be the current array element.
9204   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9205   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9206     return MapperCGF
9207         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9208         .getAddress(MapperCGF);
9209   });
9210   (void)Scope.Privatize();
9211 
9212   // Get map clause information. Fill up the arrays with all mapped variables.
9213   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9214   MappableExprsHandler::MapValuesArrayTy Pointers;
9215   MappableExprsHandler::MapValuesArrayTy Sizes;
9216   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9217   MappableExprsHandler MEHandler(*D, MapperCGF);
9218   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9219 
9220   // Call the runtime API __tgt_mapper_num_components to get the number of
9221   // pre-existing components.
9222   llvm::Value *OffloadingArgs[] = {Handle};
9223   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9224       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9225   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9226       PreviousSize,
9227       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9228 
9229   // Fill up the runtime mapper handle for all components.
9230   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9231     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9232         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9233     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9234         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9235     llvm::Value *CurSizeArg = Sizes[I];
9236 
9237     // Extract the MEMBER_OF field from the map type.
9238     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9239     MapperCGF.EmitBlock(MemberBB);
9240     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9241     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9242         OriMapType,
9243         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9244     llvm::BasicBlock *MemberCombineBB =
9245         MapperCGF.createBasicBlock("omp.member.combine");
9246     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9247     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9248     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9249     // Add the number of pre-existing components to the MEMBER_OF field if it
9250     // is valid.
9251     MapperCGF.EmitBlock(MemberCombineBB);
9252     llvm::Value *CombinedMember =
9253         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9254     // Do nothing if it is not a member of previous components.
9255     MapperCGF.EmitBlock(TypeBB);
9256     llvm::PHINode *MemberMapType =
9257         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9258     MemberMapType->addIncoming(OriMapType, MemberBB);
9259     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9260 
9261     // Combine the map type inherited from user-defined mapper with that
9262     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9263     // bits of the \a MapType, which is the input argument of the mapper
9264     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9265     // bits of MemberMapType.
9266     // [OpenMP 5.0], 1.2.6. map-type decay.
9267     //        | alloc |  to   | from  | tofrom | release | delete
9268     // ----------------------------------------------------------
9269     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9270     // to     | alloc |  to   | alloc |   to   | release | delete
9271     // from   | alloc | alloc | from  |  from  | release | delete
9272     // tofrom | alloc |  to   | from  | tofrom | release | delete
9273     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9274         MapType,
9275         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9276                                    MappableExprsHandler::OMP_MAP_FROM));
9277     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9278     llvm::BasicBlock *AllocElseBB =
9279         MapperCGF.createBasicBlock("omp.type.alloc.else");
9280     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9281     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9282     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9283     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9284     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9285     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9286     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9287     MapperCGF.EmitBlock(AllocBB);
9288     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9289         MemberMapType,
9290         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9291                                      MappableExprsHandler::OMP_MAP_FROM)));
9292     MapperCGF.Builder.CreateBr(EndBB);
9293     MapperCGF.EmitBlock(AllocElseBB);
9294     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9295         LeftToFrom,
9296         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9297     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9298     // In case of to, clear OMP_MAP_FROM.
9299     MapperCGF.EmitBlock(ToBB);
9300     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9301         MemberMapType,
9302         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9303     MapperCGF.Builder.CreateBr(EndBB);
9304     MapperCGF.EmitBlock(ToElseBB);
9305     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9306         LeftToFrom,
9307         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9308     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9309     // In case of from, clear OMP_MAP_TO.
9310     MapperCGF.EmitBlock(FromBB);
9311     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9312         MemberMapType,
9313         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9314     // In case of tofrom, do nothing.
9315     MapperCGF.EmitBlock(EndBB);
9316     llvm::PHINode *CurMapType =
9317         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9318     CurMapType->addIncoming(AllocMapType, AllocBB);
9319     CurMapType->addIncoming(ToMapType, ToBB);
9320     CurMapType->addIncoming(FromMapType, FromBB);
9321     CurMapType->addIncoming(MemberMapType, ToElseBB);
9322 
9323     // TODO: call the corresponding mapper function if a user-defined mapper is
9324     // associated with this map clause.
9325     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9326     // data structure.
9327     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9328                                      CurSizeArg, CurMapType};
9329     MapperCGF.EmitRuntimeCall(
9330         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9331         OffloadingArgs);
9332   }
9333 
9334   // Update the pointer to point to the next element that needs to be mapped,
9335   // and check whether we have mapped all elements.
9336   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9337       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9338   PtrPHI->addIncoming(PtrNext, BodyBB);
9339   llvm::Value *IsDone =
9340       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9341   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9342   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9343 
9344   MapperCGF.EmitBlock(ExitBB);
9345   // Emit array deletion if this is an array section and \p MapType indicates
9346   // that deletion is required.
9347   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9348                              ElementSize, DoneBB, /*IsInit=*/false);
9349 
9350   // Emit the function exit block.
9351   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9352   MapperCGF.FinishFunction();
9353   UDMMap.try_emplace(D, Fn);
9354   if (CGF) {
9355     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9356     Decls.second.push_back(D);
9357   }
9358 }
9359 
9360 /// Emit the array initialization or deletion portion for user-defined mapper
9361 /// code generation. First, it evaluates whether an array section is mapped and
9362 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9363 /// true, and \a MapType indicates to not delete this array, array
9364 /// initialization code is generated. If \a IsInit is false, and \a MapType
9365 /// indicates to not this array, array deletion code is generated.
9366 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9367     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9368     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9369     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9370   StringRef Prefix = IsInit ? ".init" : ".del";
9371 
9372   // Evaluate if this is an array section.
9373   llvm::BasicBlock *IsDeleteBB =
9374       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9375   llvm::BasicBlock *BodyBB =
9376       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9377   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9378       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9379   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9380 
9381   // Evaluate if we are going to delete this section.
9382   MapperCGF.EmitBlock(IsDeleteBB);
9383   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9384       MapType,
9385       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9386   llvm::Value *DeleteCond;
9387   if (IsInit) {
9388     DeleteCond = MapperCGF.Builder.CreateIsNull(
9389         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9390   } else {
9391     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9392         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9393   }
9394   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9395 
9396   MapperCGF.EmitBlock(BodyBB);
9397   // Get the array size by multiplying element size and element number (i.e., \p
9398   // Size).
9399   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9400       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9401   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9402   // memory allocation/deletion purpose only.
9403   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9404       MapType,
9405       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9406                                    MappableExprsHandler::OMP_MAP_FROM)));
9407   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9408   // data structure.
9409   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9410   MapperCGF.EmitRuntimeCall(
9411       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9412 }
9413 
9414 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9415     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9416     llvm::Value *DeviceID,
9417     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9418                                      const OMPLoopDirective &D)>
9419         SizeEmitter) {
9420   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9421   const OMPExecutableDirective *TD = &D;
9422   // Get nested teams distribute kind directive, if any.
9423   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9424     TD = getNestedDistributeDirective(CGM.getContext(), D);
9425   if (!TD)
9426     return;
9427   const auto *LD = cast<OMPLoopDirective>(TD);
9428   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9429                                                      PrePostActionTy &) {
9430     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9431       llvm::Value *Args[] = {DeviceID, NumIterations};
9432       CGF.EmitRuntimeCall(
9433           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9434     }
9435   };
9436   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9437 }
9438 
9439 void CGOpenMPRuntime::emitTargetCall(
9440     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9441     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9442     const Expr *Device,
9443     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9444                                      const OMPLoopDirective &D)>
9445         SizeEmitter) {
9446   if (!CGF.HaveInsertPoint())
9447     return;
9448 
9449   assert(OutlinedFn && "Invalid outlined function!");
9450 
9451   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9452   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9453   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9454   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9455                                             PrePostActionTy &) {
9456     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9457   };
9458   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9459 
9460   CodeGenFunction::OMPTargetDataInfo InputInfo;
9461   llvm::Value *MapTypesArray = nullptr;
9462   // Fill up the pointer arrays and transfer execution to the device.
9463   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9464                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9465                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9466     // On top of the arrays that were filled up, the target offloading call
9467     // takes as arguments the device id as well as the host pointer. The host
9468     // pointer is used by the runtime library to identify the current target
9469     // region, so it only has to be unique and not necessarily point to
9470     // anything. It could be the pointer to the outlined function that
9471     // implements the target region, but we aren't using that so that the
9472     // compiler doesn't need to keep that, and could therefore inline the host
9473     // function if proven worthwhile during optimization.
9474 
9475     // From this point on, we need to have an ID of the target region defined.
9476     assert(OutlinedFnID && "Invalid outlined function ID!");
9477 
9478     // Emit device ID if any.
9479     llvm::Value *DeviceID;
9480     if (Device) {
9481       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9482                                            CGF.Int64Ty, /*isSigned=*/true);
9483     } else {
9484       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9485     }
9486 
9487     // Emit the number of elements in the offloading arrays.
9488     llvm::Value *PointerNum =
9489         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9490 
9491     // Return value of the runtime offloading call.
9492     llvm::Value *Return;
9493 
9494     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9495     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9496 
9497     // Emit tripcount for the target loop-based directive.
9498     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9499 
9500     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9501     // The target region is an outlined function launched by the runtime
9502     // via calls __tgt_target() or __tgt_target_teams().
9503     //
9504     // __tgt_target() launches a target region with one team and one thread,
9505     // executing a serial region.  This master thread may in turn launch
9506     // more threads within its team upon encountering a parallel region,
9507     // however, no additional teams can be launched on the device.
9508     //
9509     // __tgt_target_teams() launches a target region with one or more teams,
9510     // each with one or more threads.  This call is required for target
9511     // constructs such as:
9512     //  'target teams'
9513     //  'target' / 'teams'
9514     //  'target teams distribute parallel for'
9515     //  'target parallel'
9516     // and so on.
9517     //
9518     // Note that on the host and CPU targets, the runtime implementation of
9519     // these calls simply call the outlined function without forking threads.
9520     // The outlined functions themselves have runtime calls to
9521     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9522     // the compiler in emitTeamsCall() and emitParallelCall().
9523     //
9524     // In contrast, on the NVPTX target, the implementation of
9525     // __tgt_target_teams() launches a GPU kernel with the requested number
9526     // of teams and threads so no additional calls to the runtime are required.
9527     if (NumTeams) {
9528       // If we have NumTeams defined this means that we have an enclosed teams
9529       // region. Therefore we also expect to have NumThreads defined. These two
9530       // values should be defined in the presence of a teams directive,
9531       // regardless of having any clauses associated. If the user is using teams
9532       // but no clauses, these two values will be the default that should be
9533       // passed to the runtime library - a 32-bit integer with the value zero.
9534       assert(NumThreads && "Thread limit expression should be available along "
9535                            "with number of teams.");
9536       llvm::Value *OffloadingArgs[] = {DeviceID,
9537                                        OutlinedFnID,
9538                                        PointerNum,
9539                                        InputInfo.BasePointersArray.getPointer(),
9540                                        InputInfo.PointersArray.getPointer(),
9541                                        InputInfo.SizesArray.getPointer(),
9542                                        MapTypesArray,
9543                                        NumTeams,
9544                                        NumThreads};
9545       Return = CGF.EmitRuntimeCall(
9546           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9547                                           : OMPRTL__tgt_target_teams),
9548           OffloadingArgs);
9549     } else {
9550       llvm::Value *OffloadingArgs[] = {DeviceID,
9551                                        OutlinedFnID,
9552                                        PointerNum,
9553                                        InputInfo.BasePointersArray.getPointer(),
9554                                        InputInfo.PointersArray.getPointer(),
9555                                        InputInfo.SizesArray.getPointer(),
9556                                        MapTypesArray};
9557       Return = CGF.EmitRuntimeCall(
9558           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9559                                           : OMPRTL__tgt_target),
9560           OffloadingArgs);
9561     }
9562 
9563     // Check the error code and execute the host version if required.
9564     llvm::BasicBlock *OffloadFailedBlock =
9565         CGF.createBasicBlock("omp_offload.failed");
9566     llvm::BasicBlock *OffloadContBlock =
9567         CGF.createBasicBlock("omp_offload.cont");
9568     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9569     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9570 
9571     CGF.EmitBlock(OffloadFailedBlock);
9572     if (RequiresOuterTask) {
9573       CapturedVars.clear();
9574       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9575     }
9576     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9577     CGF.EmitBranch(OffloadContBlock);
9578 
9579     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9580   };
9581 
9582   // Notify that the host version must be executed.
9583   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9584                     RequiresOuterTask](CodeGenFunction &CGF,
9585                                        PrePostActionTy &) {
9586     if (RequiresOuterTask) {
9587       CapturedVars.clear();
9588       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9589     }
9590     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9591   };
9592 
9593   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9594                           &CapturedVars, RequiresOuterTask,
9595                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9596     // Fill up the arrays with all the captured variables.
9597     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9598     MappableExprsHandler::MapValuesArrayTy Pointers;
9599     MappableExprsHandler::MapValuesArrayTy Sizes;
9600     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9601 
9602     // Get mappable expression information.
9603     MappableExprsHandler MEHandler(D, CGF);
9604     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9605 
9606     auto RI = CS.getCapturedRecordDecl()->field_begin();
9607     auto CV = CapturedVars.begin();
9608     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9609                                               CE = CS.capture_end();
9610          CI != CE; ++CI, ++RI, ++CV) {
9611       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9612       MappableExprsHandler::MapValuesArrayTy CurPointers;
9613       MappableExprsHandler::MapValuesArrayTy CurSizes;
9614       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9615       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9616 
9617       // VLA sizes are passed to the outlined region by copy and do not have map
9618       // information associated.
9619       if (CI->capturesVariableArrayType()) {
9620         CurBasePointers.push_back(*CV);
9621         CurPointers.push_back(*CV);
9622         CurSizes.push_back(CGF.Builder.CreateIntCast(
9623             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9624         // Copy to the device as an argument. No need to retrieve it.
9625         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9626                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9627                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9628       } else {
9629         // If we have any information in the map clause, we use it, otherwise we
9630         // just do a default mapping.
9631         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9632                                          CurSizes, CurMapTypes, PartialStruct);
9633         if (CurBasePointers.empty())
9634           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9635                                            CurPointers, CurSizes, CurMapTypes);
9636         // Generate correct mapping for variables captured by reference in
9637         // lambdas.
9638         if (CI->capturesVariable())
9639           MEHandler.generateInfoForLambdaCaptures(
9640               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9641               CurMapTypes, LambdaPointers);
9642       }
9643       // We expect to have at least an element of information for this capture.
9644       assert(!CurBasePointers.empty() &&
9645              "Non-existing map pointer for capture!");
9646       assert(CurBasePointers.size() == CurPointers.size() &&
9647              CurBasePointers.size() == CurSizes.size() &&
9648              CurBasePointers.size() == CurMapTypes.size() &&
9649              "Inconsistent map information sizes!");
9650 
9651       // If there is an entry in PartialStruct it means we have a struct with
9652       // individual members mapped. Emit an extra combined entry.
9653       if (PartialStruct.Base.isValid())
9654         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9655                                     CurMapTypes, PartialStruct);
9656 
9657       // We need to append the results of this capture to what we already have.
9658       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9659       Pointers.append(CurPointers.begin(), CurPointers.end());
9660       Sizes.append(CurSizes.begin(), CurSizes.end());
9661       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9662     }
9663     // Adjust MEMBER_OF flags for the lambdas captures.
9664     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9665                                               Pointers, MapTypes);
9666     // Map other list items in the map clause which are not captured variables
9667     // but "declare target link" global variables.
9668     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9669                                                MapTypes);
9670 
9671     TargetDataInfo Info;
9672     // Fill up the arrays and create the arguments.
9673     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9674     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9675                                  Info.PointersArray, Info.SizesArray,
9676                                  Info.MapTypesArray, Info);
9677     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9678     InputInfo.BasePointersArray =
9679         Address(Info.BasePointersArray, CGM.getPointerAlign());
9680     InputInfo.PointersArray =
9681         Address(Info.PointersArray, CGM.getPointerAlign());
9682     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9683     MapTypesArray = Info.MapTypesArray;
9684     if (RequiresOuterTask)
9685       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9686     else
9687       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9688   };
9689 
9690   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9691                              CodeGenFunction &CGF, PrePostActionTy &) {
9692     if (RequiresOuterTask) {
9693       CodeGenFunction::OMPTargetDataInfo InputInfo;
9694       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9695     } else {
9696       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9697     }
9698   };
9699 
9700   // If we have a target function ID it means that we need to support
9701   // offloading, otherwise, just execute on the host. We need to execute on host
9702   // regardless of the conditional in the if clause if, e.g., the user do not
9703   // specify target triples.
9704   if (OutlinedFnID) {
9705     if (IfCond) {
9706       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9707     } else {
9708       RegionCodeGenTy ThenRCG(TargetThenGen);
9709       ThenRCG(CGF);
9710     }
9711   } else {
9712     RegionCodeGenTy ElseRCG(TargetElseGen);
9713     ElseRCG(CGF);
9714   }
9715 }
9716 
9717 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9718                                                     StringRef ParentName) {
9719   if (!S)
9720     return;
9721 
9722   // Codegen OMP target directives that offload compute to the device.
9723   bool RequiresDeviceCodegen =
9724       isa<OMPExecutableDirective>(S) &&
9725       isOpenMPTargetExecutionDirective(
9726           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9727 
9728   if (RequiresDeviceCodegen) {
9729     const auto &E = *cast<OMPExecutableDirective>(S);
9730     unsigned DeviceID;
9731     unsigned FileID;
9732     unsigned Line;
9733     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9734                              FileID, Line);
9735 
9736     // Is this a target region that should not be emitted as an entry point? If
9737     // so just signal we are done with this target region.
9738     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9739                                                             ParentName, Line))
9740       return;
9741 
9742     switch (E.getDirectiveKind()) {
9743     case OMPD_target:
9744       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9745                                                    cast<OMPTargetDirective>(E));
9746       break;
9747     case OMPD_target_parallel:
9748       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9749           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9750       break;
9751     case OMPD_target_teams:
9752       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9753           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9754       break;
9755     case OMPD_target_teams_distribute:
9756       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9757           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9758       break;
9759     case OMPD_target_teams_distribute_simd:
9760       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9761           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9762       break;
9763     case OMPD_target_parallel_for:
9764       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9765           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9766       break;
9767     case OMPD_target_parallel_for_simd:
9768       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9769           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9770       break;
9771     case OMPD_target_simd:
9772       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9773           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9774       break;
9775     case OMPD_target_teams_distribute_parallel_for:
9776       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9777           CGM, ParentName,
9778           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9779       break;
9780     case OMPD_target_teams_distribute_parallel_for_simd:
9781       CodeGenFunction::
9782           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9783               CGM, ParentName,
9784               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9785       break;
9786     case OMPD_parallel:
9787     case OMPD_for:
9788     case OMPD_parallel_for:
9789     case OMPD_parallel_master:
9790     case OMPD_parallel_sections:
9791     case OMPD_for_simd:
9792     case OMPD_parallel_for_simd:
9793     case OMPD_cancel:
9794     case OMPD_cancellation_point:
9795     case OMPD_ordered:
9796     case OMPD_threadprivate:
9797     case OMPD_allocate:
9798     case OMPD_task:
9799     case OMPD_simd:
9800     case OMPD_sections:
9801     case OMPD_section:
9802     case OMPD_single:
9803     case OMPD_master:
9804     case OMPD_critical:
9805     case OMPD_taskyield:
9806     case OMPD_barrier:
9807     case OMPD_taskwait:
9808     case OMPD_taskgroup:
9809     case OMPD_atomic:
9810     case OMPD_flush:
9811     case OMPD_depobj:
9812     case OMPD_teams:
9813     case OMPD_target_data:
9814     case OMPD_target_exit_data:
9815     case OMPD_target_enter_data:
9816     case OMPD_distribute:
9817     case OMPD_distribute_simd:
9818     case OMPD_distribute_parallel_for:
9819     case OMPD_distribute_parallel_for_simd:
9820     case OMPD_teams_distribute:
9821     case OMPD_teams_distribute_simd:
9822     case OMPD_teams_distribute_parallel_for:
9823     case OMPD_teams_distribute_parallel_for_simd:
9824     case OMPD_target_update:
9825     case OMPD_declare_simd:
9826     case OMPD_declare_variant:
9827     case OMPD_declare_target:
9828     case OMPD_end_declare_target:
9829     case OMPD_declare_reduction:
9830     case OMPD_declare_mapper:
9831     case OMPD_taskloop:
9832     case OMPD_taskloop_simd:
9833     case OMPD_master_taskloop:
9834     case OMPD_master_taskloop_simd:
9835     case OMPD_parallel_master_taskloop:
9836     case OMPD_parallel_master_taskloop_simd:
9837     case OMPD_requires:
9838     case OMPD_unknown:
9839       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9840     }
9841     return;
9842   }
9843 
9844   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9845     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9846       return;
9847 
9848     scanForTargetRegionsFunctions(
9849         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9850     return;
9851   }
9852 
9853   // If this is a lambda function, look into its body.
9854   if (const auto *L = dyn_cast<LambdaExpr>(S))
9855     S = L->getBody();
9856 
9857   // Keep looking for target regions recursively.
9858   for (const Stmt *II : S->children())
9859     scanForTargetRegionsFunctions(II, ParentName);
9860 }
9861 
9862 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9863   // If emitting code for the host, we do not process FD here. Instead we do
9864   // the normal code generation.
9865   if (!CGM.getLangOpts().OpenMPIsDevice) {
9866     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9867       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9868           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9869       // Do not emit device_type(nohost) functions for the host.
9870       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9871         return true;
9872     }
9873     return false;
9874   }
9875 
9876   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9877   // Try to detect target regions in the function.
9878   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9879     StringRef Name = CGM.getMangledName(GD);
9880     scanForTargetRegionsFunctions(FD->getBody(), Name);
9881     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9882         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9883     // Do not emit device_type(nohost) functions for the host.
9884     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9885       return true;
9886   }
9887 
9888   // Do not to emit function if it is not marked as declare target.
9889   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9890          AlreadyEmittedTargetDecls.count(VD) == 0;
9891 }
9892 
9893 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9894   if (!CGM.getLangOpts().OpenMPIsDevice)
9895     return false;
9896 
9897   // Check if there are Ctors/Dtors in this declaration and look for target
9898   // regions in it. We use the complete variant to produce the kernel name
9899   // mangling.
9900   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9901   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9902     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9903       StringRef ParentName =
9904           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9905       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9906     }
9907     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9908       StringRef ParentName =
9909           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9910       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9911     }
9912   }
9913 
9914   // Do not to emit variable if it is not marked as declare target.
9915   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9916       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9917           cast<VarDecl>(GD.getDecl()));
9918   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9919       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9920        HasRequiresUnifiedSharedMemory)) {
9921     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9922     return true;
9923   }
9924   return false;
9925 }
9926 
9927 llvm::Constant *
9928 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9929                                                 const VarDecl *VD) {
9930   assert(VD->getType().isConstant(CGM.getContext()) &&
9931          "Expected constant variable.");
9932   StringRef VarName;
9933   llvm::Constant *Addr;
9934   llvm::GlobalValue::LinkageTypes Linkage;
9935   QualType Ty = VD->getType();
9936   SmallString<128> Buffer;
9937   {
9938     unsigned DeviceID;
9939     unsigned FileID;
9940     unsigned Line;
9941     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9942                              FileID, Line);
9943     llvm::raw_svector_ostream OS(Buffer);
9944     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9945        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9946     VarName = OS.str();
9947   }
9948   Linkage = llvm::GlobalValue::InternalLinkage;
9949   Addr =
9950       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9951                                   getDefaultFirstprivateAddressSpace());
9952   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9953   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9954   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9955   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9956       VarName, Addr, VarSize,
9957       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9958   return Addr;
9959 }
9960 
9961 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9962                                                    llvm::Constant *Addr) {
9963   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9964       !CGM.getLangOpts().OpenMPIsDevice)
9965     return;
9966   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9967       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9968   if (!Res) {
9969     if (CGM.getLangOpts().OpenMPIsDevice) {
9970       // Register non-target variables being emitted in device code (debug info
9971       // may cause this).
9972       StringRef VarName = CGM.getMangledName(VD);
9973       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9974     }
9975     return;
9976   }
9977   // Register declare target variables.
9978   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9979   StringRef VarName;
9980   CharUnits VarSize;
9981   llvm::GlobalValue::LinkageTypes Linkage;
9982 
9983   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9984       !HasRequiresUnifiedSharedMemory) {
9985     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9986     VarName = CGM.getMangledName(VD);
9987     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9988       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9989       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9990     } else {
9991       VarSize = CharUnits::Zero();
9992     }
9993     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9994     // Temp solution to prevent optimizations of the internal variables.
9995     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9996       std::string RefName = getName({VarName, "ref"});
9997       if (!CGM.GetGlobalValue(RefName)) {
9998         llvm::Constant *AddrRef =
9999             getOrCreateInternalVariable(Addr->getType(), RefName);
10000         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10001         GVAddrRef->setConstant(/*Val=*/true);
10002         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10003         GVAddrRef->setInitializer(Addr);
10004         CGM.addCompilerUsedGlobal(GVAddrRef);
10005       }
10006     }
10007   } else {
10008     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10009             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10010              HasRequiresUnifiedSharedMemory)) &&
10011            "Declare target attribute must link or to with unified memory.");
10012     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10013       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10014     else
10015       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10016 
10017     if (CGM.getLangOpts().OpenMPIsDevice) {
10018       VarName = Addr->getName();
10019       Addr = nullptr;
10020     } else {
10021       VarName = getAddrOfDeclareTargetVar(VD).getName();
10022       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10023     }
10024     VarSize = CGM.getPointerSize();
10025     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10026   }
10027 
10028   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10029       VarName, Addr, VarSize, Flags, Linkage);
10030 }
10031 
10032 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10033   if (isa<FunctionDecl>(GD.getDecl()) ||
10034       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10035     return emitTargetFunctions(GD);
10036 
10037   return emitTargetGlobalVariable(GD);
10038 }
10039 
10040 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10041   for (const VarDecl *VD : DeferredGlobalVariables) {
10042     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10043         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10044     if (!Res)
10045       continue;
10046     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10047         !HasRequiresUnifiedSharedMemory) {
10048       CGM.EmitGlobal(VD);
10049     } else {
10050       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10051               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10052                HasRequiresUnifiedSharedMemory)) &&
10053              "Expected link clause or to clause with unified memory.");
10054       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10055     }
10056   }
10057 }
10058 
10059 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10060     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10061   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10062          " Expected target-based directive.");
10063 }
10064 
10065 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10066   for (const OMPClause *Clause : D->clauselists()) {
10067     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10068       HasRequiresUnifiedSharedMemory = true;
10069     } else if (const auto *AC =
10070                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10071       switch (AC->getAtomicDefaultMemOrderKind()) {
10072       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10073         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10074         break;
10075       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10076         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10077         break;
10078       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10079         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10080         break;
10081       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10082         break;
10083       }
10084     }
10085   }
10086 }
10087 
10088 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10089   return RequiresAtomicOrdering;
10090 }
10091 
10092 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10093                                                        LangAS &AS) {
10094   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10095     return false;
10096   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10097   switch(A->getAllocatorType()) {
10098   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10099   // Not supported, fallback to the default mem space.
10100   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10101   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10102   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10103   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10104   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10105   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10106   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10107     AS = LangAS::Default;
10108     return true;
10109   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10110     llvm_unreachable("Expected predefined allocator for the variables with the "
10111                      "static storage.");
10112   }
10113   return false;
10114 }
10115 
10116 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10117   return HasRequiresUnifiedSharedMemory;
10118 }
10119 
10120 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10121     CodeGenModule &CGM)
10122     : CGM(CGM) {
10123   if (CGM.getLangOpts().OpenMPIsDevice) {
10124     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10125     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10126   }
10127 }
10128 
10129 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10130   if (CGM.getLangOpts().OpenMPIsDevice)
10131     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10132 }
10133 
10134 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10135   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10136     return true;
10137 
10138   const auto *D = cast<FunctionDecl>(GD.getDecl());
10139   // Do not to emit function if it is marked as declare target as it was already
10140   // emitted.
10141   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10142     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10143       if (auto *F = dyn_cast_or_null<llvm::Function>(
10144               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10145         return !F->isDeclaration();
10146       return false;
10147     }
10148     return true;
10149   }
10150 
10151   return !AlreadyEmittedTargetDecls.insert(D).second;
10152 }
10153 
10154 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10155   // If we don't have entries or if we are emitting code for the device, we
10156   // don't need to do anything.
10157   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10158       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10159       (OffloadEntriesInfoManager.empty() &&
10160        !HasEmittedDeclareTargetRegion &&
10161        !HasEmittedTargetRegion))
10162     return nullptr;
10163 
10164   // Create and register the function that handles the requires directives.
10165   ASTContext &C = CGM.getContext();
10166 
10167   llvm::Function *RequiresRegFn;
10168   {
10169     CodeGenFunction CGF(CGM);
10170     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10171     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10172     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10173     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10174     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10175     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10176     // TODO: check for other requires clauses.
10177     // The requires directive takes effect only when a target region is
10178     // present in the compilation unit. Otherwise it is ignored and not
10179     // passed to the runtime. This avoids the runtime from throwing an error
10180     // for mismatching requires clauses across compilation units that don't
10181     // contain at least 1 target region.
10182     assert((HasEmittedTargetRegion ||
10183             HasEmittedDeclareTargetRegion ||
10184             !OffloadEntriesInfoManager.empty()) &&
10185            "Target or declare target region expected.");
10186     if (HasRequiresUnifiedSharedMemory)
10187       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10188     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10189         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10190     CGF.FinishFunction();
10191   }
10192   return RequiresRegFn;
10193 }
10194 
10195 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10196                                     const OMPExecutableDirective &D,
10197                                     SourceLocation Loc,
10198                                     llvm::Function *OutlinedFn,
10199                                     ArrayRef<llvm::Value *> CapturedVars) {
10200   if (!CGF.HaveInsertPoint())
10201     return;
10202 
10203   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10204   CodeGenFunction::RunCleanupsScope Scope(CGF);
10205 
10206   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10207   llvm::Value *Args[] = {
10208       RTLoc,
10209       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10210       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10211   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10212   RealArgs.append(std::begin(Args), std::end(Args));
10213   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10214 
10215   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10216   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10217 }
10218 
10219 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10220                                          const Expr *NumTeams,
10221                                          const Expr *ThreadLimit,
10222                                          SourceLocation Loc) {
10223   if (!CGF.HaveInsertPoint())
10224     return;
10225 
10226   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10227 
10228   llvm::Value *NumTeamsVal =
10229       NumTeams
10230           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10231                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10232           : CGF.Builder.getInt32(0);
10233 
10234   llvm::Value *ThreadLimitVal =
10235       ThreadLimit
10236           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10237                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10238           : CGF.Builder.getInt32(0);
10239 
10240   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10241   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10242                                      ThreadLimitVal};
10243   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10244                       PushNumTeamsArgs);
10245 }
10246 
10247 void CGOpenMPRuntime::emitTargetDataCalls(
10248     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10249     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10250   if (!CGF.HaveInsertPoint())
10251     return;
10252 
10253   // Action used to replace the default codegen action and turn privatization
10254   // off.
10255   PrePostActionTy NoPrivAction;
10256 
10257   // Generate the code for the opening of the data environment. Capture all the
10258   // arguments of the runtime call by reference because they are used in the
10259   // closing of the region.
10260   auto &&BeginThenGen = [this, &D, Device, &Info,
10261                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10262     // Fill up the arrays with all the mapped variables.
10263     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10264     MappableExprsHandler::MapValuesArrayTy Pointers;
10265     MappableExprsHandler::MapValuesArrayTy Sizes;
10266     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10267 
10268     // Get map clause information.
10269     MappableExprsHandler MCHandler(D, CGF);
10270     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10271 
10272     // Fill up the arrays and create the arguments.
10273     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10274 
10275     llvm::Value *BasePointersArrayArg = nullptr;
10276     llvm::Value *PointersArrayArg = nullptr;
10277     llvm::Value *SizesArrayArg = nullptr;
10278     llvm::Value *MapTypesArrayArg = nullptr;
10279     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10280                                  SizesArrayArg, MapTypesArrayArg, Info);
10281 
10282     // Emit device ID if any.
10283     llvm::Value *DeviceID = nullptr;
10284     if (Device) {
10285       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10286                                            CGF.Int64Ty, /*isSigned=*/true);
10287     } else {
10288       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10289     }
10290 
10291     // Emit the number of elements in the offloading arrays.
10292     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10293 
10294     llvm::Value *OffloadingArgs[] = {
10295         DeviceID,         PointerNum,    BasePointersArrayArg,
10296         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10297     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10298                         OffloadingArgs);
10299 
10300     // If device pointer privatization is required, emit the body of the region
10301     // here. It will have to be duplicated: with and without privatization.
10302     if (!Info.CaptureDeviceAddrMap.empty())
10303       CodeGen(CGF);
10304   };
10305 
10306   // Generate code for the closing of the data region.
10307   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10308                                             PrePostActionTy &) {
10309     assert(Info.isValid() && "Invalid data environment closing arguments.");
10310 
10311     llvm::Value *BasePointersArrayArg = nullptr;
10312     llvm::Value *PointersArrayArg = nullptr;
10313     llvm::Value *SizesArrayArg = nullptr;
10314     llvm::Value *MapTypesArrayArg = nullptr;
10315     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10316                                  SizesArrayArg, MapTypesArrayArg, Info);
10317 
10318     // Emit device ID if any.
10319     llvm::Value *DeviceID = nullptr;
10320     if (Device) {
10321       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10322                                            CGF.Int64Ty, /*isSigned=*/true);
10323     } else {
10324       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10325     }
10326 
10327     // Emit the number of elements in the offloading arrays.
10328     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10329 
10330     llvm::Value *OffloadingArgs[] = {
10331         DeviceID,         PointerNum,    BasePointersArrayArg,
10332         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10333     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10334                         OffloadingArgs);
10335   };
10336 
10337   // If we need device pointer privatization, we need to emit the body of the
10338   // region with no privatization in the 'else' branch of the conditional.
10339   // Otherwise, we don't have to do anything.
10340   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10341                                                          PrePostActionTy &) {
10342     if (!Info.CaptureDeviceAddrMap.empty()) {
10343       CodeGen.setAction(NoPrivAction);
10344       CodeGen(CGF);
10345     }
10346   };
10347 
10348   // We don't have to do anything to close the region if the if clause evaluates
10349   // to false.
10350   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10351 
10352   if (IfCond) {
10353     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10354   } else {
10355     RegionCodeGenTy RCG(BeginThenGen);
10356     RCG(CGF);
10357   }
10358 
10359   // If we don't require privatization of device pointers, we emit the body in
10360   // between the runtime calls. This avoids duplicating the body code.
10361   if (Info.CaptureDeviceAddrMap.empty()) {
10362     CodeGen.setAction(NoPrivAction);
10363     CodeGen(CGF);
10364   }
10365 
10366   if (IfCond) {
10367     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10368   } else {
10369     RegionCodeGenTy RCG(EndThenGen);
10370     RCG(CGF);
10371   }
10372 }
10373 
10374 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10375     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10376     const Expr *Device) {
10377   if (!CGF.HaveInsertPoint())
10378     return;
10379 
10380   assert((isa<OMPTargetEnterDataDirective>(D) ||
10381           isa<OMPTargetExitDataDirective>(D) ||
10382           isa<OMPTargetUpdateDirective>(D)) &&
10383          "Expecting either target enter, exit data, or update directives.");
10384 
10385   CodeGenFunction::OMPTargetDataInfo InputInfo;
10386   llvm::Value *MapTypesArray = nullptr;
10387   // Generate the code for the opening of the data environment.
10388   auto &&ThenGen = [this, &D, Device, &InputInfo,
10389                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10390     // Emit device ID if any.
10391     llvm::Value *DeviceID = nullptr;
10392     if (Device) {
10393       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10394                                            CGF.Int64Ty, /*isSigned=*/true);
10395     } else {
10396       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10397     }
10398 
10399     // Emit the number of elements in the offloading arrays.
10400     llvm::Constant *PointerNum =
10401         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10402 
10403     llvm::Value *OffloadingArgs[] = {DeviceID,
10404                                      PointerNum,
10405                                      InputInfo.BasePointersArray.getPointer(),
10406                                      InputInfo.PointersArray.getPointer(),
10407                                      InputInfo.SizesArray.getPointer(),
10408                                      MapTypesArray};
10409 
10410     // Select the right runtime function call for each expected standalone
10411     // directive.
10412     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10413     OpenMPRTLFunction RTLFn;
10414     switch (D.getDirectiveKind()) {
10415     case OMPD_target_enter_data:
10416       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10417                         : OMPRTL__tgt_target_data_begin;
10418       break;
10419     case OMPD_target_exit_data:
10420       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10421                         : OMPRTL__tgt_target_data_end;
10422       break;
10423     case OMPD_target_update:
10424       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10425                         : OMPRTL__tgt_target_data_update;
10426       break;
10427     case OMPD_parallel:
10428     case OMPD_for:
10429     case OMPD_parallel_for:
10430     case OMPD_parallel_master:
10431     case OMPD_parallel_sections:
10432     case OMPD_for_simd:
10433     case OMPD_parallel_for_simd:
10434     case OMPD_cancel:
10435     case OMPD_cancellation_point:
10436     case OMPD_ordered:
10437     case OMPD_threadprivate:
10438     case OMPD_allocate:
10439     case OMPD_task:
10440     case OMPD_simd:
10441     case OMPD_sections:
10442     case OMPD_section:
10443     case OMPD_single:
10444     case OMPD_master:
10445     case OMPD_critical:
10446     case OMPD_taskyield:
10447     case OMPD_barrier:
10448     case OMPD_taskwait:
10449     case OMPD_taskgroup:
10450     case OMPD_atomic:
10451     case OMPD_flush:
10452     case OMPD_depobj:
10453     case OMPD_teams:
10454     case OMPD_target_data:
10455     case OMPD_distribute:
10456     case OMPD_distribute_simd:
10457     case OMPD_distribute_parallel_for:
10458     case OMPD_distribute_parallel_for_simd:
10459     case OMPD_teams_distribute:
10460     case OMPD_teams_distribute_simd:
10461     case OMPD_teams_distribute_parallel_for:
10462     case OMPD_teams_distribute_parallel_for_simd:
10463     case OMPD_declare_simd:
10464     case OMPD_declare_variant:
10465     case OMPD_declare_target:
10466     case OMPD_end_declare_target:
10467     case OMPD_declare_reduction:
10468     case OMPD_declare_mapper:
10469     case OMPD_taskloop:
10470     case OMPD_taskloop_simd:
10471     case OMPD_master_taskloop:
10472     case OMPD_master_taskloop_simd:
10473     case OMPD_parallel_master_taskloop:
10474     case OMPD_parallel_master_taskloop_simd:
10475     case OMPD_target:
10476     case OMPD_target_simd:
10477     case OMPD_target_teams_distribute:
10478     case OMPD_target_teams_distribute_simd:
10479     case OMPD_target_teams_distribute_parallel_for:
10480     case OMPD_target_teams_distribute_parallel_for_simd:
10481     case OMPD_target_teams:
10482     case OMPD_target_parallel:
10483     case OMPD_target_parallel_for:
10484     case OMPD_target_parallel_for_simd:
10485     case OMPD_requires:
10486     case OMPD_unknown:
10487       llvm_unreachable("Unexpected standalone target data directive.");
10488       break;
10489     }
10490     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10491   };
10492 
10493   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10494                              CodeGenFunction &CGF, PrePostActionTy &) {
10495     // Fill up the arrays with all the mapped variables.
10496     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10497     MappableExprsHandler::MapValuesArrayTy Pointers;
10498     MappableExprsHandler::MapValuesArrayTy Sizes;
10499     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10500 
10501     // Get map clause information.
10502     MappableExprsHandler MEHandler(D, CGF);
10503     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10504 
10505     TargetDataInfo Info;
10506     // Fill up the arrays and create the arguments.
10507     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10508     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10509                                  Info.PointersArray, Info.SizesArray,
10510                                  Info.MapTypesArray, Info);
10511     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10512     InputInfo.BasePointersArray =
10513         Address(Info.BasePointersArray, CGM.getPointerAlign());
10514     InputInfo.PointersArray =
10515         Address(Info.PointersArray, CGM.getPointerAlign());
10516     InputInfo.SizesArray =
10517         Address(Info.SizesArray, CGM.getPointerAlign());
10518     MapTypesArray = Info.MapTypesArray;
10519     if (D.hasClausesOfKind<OMPDependClause>())
10520       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10521     else
10522       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10523   };
10524 
10525   if (IfCond) {
10526     emitIfClause(CGF, IfCond, TargetThenGen,
10527                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10528   } else {
10529     RegionCodeGenTy ThenRCG(TargetThenGen);
10530     ThenRCG(CGF);
10531   }
10532 }
10533 
10534 namespace {
10535   /// Kind of parameter in a function with 'declare simd' directive.
10536   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10537   /// Attribute set of the parameter.
10538   struct ParamAttrTy {
10539     ParamKindTy Kind = Vector;
10540     llvm::APSInt StrideOrArg;
10541     llvm::APSInt Alignment;
10542   };
10543 } // namespace
10544 
10545 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10546                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10547   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10548   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10549   // of that clause. The VLEN value must be power of 2.
10550   // In other case the notion of the function`s "characteristic data type" (CDT)
10551   // is used to compute the vector length.
10552   // CDT is defined in the following order:
10553   //   a) For non-void function, the CDT is the return type.
10554   //   b) If the function has any non-uniform, non-linear parameters, then the
10555   //   CDT is the type of the first such parameter.
10556   //   c) If the CDT determined by a) or b) above is struct, union, or class
10557   //   type which is pass-by-value (except for the type that maps to the
10558   //   built-in complex data type), the characteristic data type is int.
10559   //   d) If none of the above three cases is applicable, the CDT is int.
10560   // The VLEN is then determined based on the CDT and the size of vector
10561   // register of that ISA for which current vector version is generated. The
10562   // VLEN is computed using the formula below:
10563   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10564   // where vector register size specified in section 3.2.1 Registers and the
10565   // Stack Frame of original AMD64 ABI document.
10566   QualType RetType = FD->getReturnType();
10567   if (RetType.isNull())
10568     return 0;
10569   ASTContext &C = FD->getASTContext();
10570   QualType CDT;
10571   if (!RetType.isNull() && !RetType->isVoidType()) {
10572     CDT = RetType;
10573   } else {
10574     unsigned Offset = 0;
10575     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10576       if (ParamAttrs[Offset].Kind == Vector)
10577         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10578       ++Offset;
10579     }
10580     if (CDT.isNull()) {
10581       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10582         if (ParamAttrs[I + Offset].Kind == Vector) {
10583           CDT = FD->getParamDecl(I)->getType();
10584           break;
10585         }
10586       }
10587     }
10588   }
10589   if (CDT.isNull())
10590     CDT = C.IntTy;
10591   CDT = CDT->getCanonicalTypeUnqualified();
10592   if (CDT->isRecordType() || CDT->isUnionType())
10593     CDT = C.IntTy;
10594   return C.getTypeSize(CDT);
10595 }
10596 
10597 static void
10598 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10599                            const llvm::APSInt &VLENVal,
10600                            ArrayRef<ParamAttrTy> ParamAttrs,
10601                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10602   struct ISADataTy {
10603     char ISA;
10604     unsigned VecRegSize;
10605   };
10606   ISADataTy ISAData[] = {
10607       {
10608           'b', 128
10609       }, // SSE
10610       {
10611           'c', 256
10612       }, // AVX
10613       {
10614           'd', 256
10615       }, // AVX2
10616       {
10617           'e', 512
10618       }, // AVX512
10619   };
10620   llvm::SmallVector<char, 2> Masked;
10621   switch (State) {
10622   case OMPDeclareSimdDeclAttr::BS_Undefined:
10623     Masked.push_back('N');
10624     Masked.push_back('M');
10625     break;
10626   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10627     Masked.push_back('N');
10628     break;
10629   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10630     Masked.push_back('M');
10631     break;
10632   }
10633   for (char Mask : Masked) {
10634     for (const ISADataTy &Data : ISAData) {
10635       SmallString<256> Buffer;
10636       llvm::raw_svector_ostream Out(Buffer);
10637       Out << "_ZGV" << Data.ISA << Mask;
10638       if (!VLENVal) {
10639         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10640         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10641         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10642       } else {
10643         Out << VLENVal;
10644       }
10645       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10646         switch (ParamAttr.Kind){
10647         case LinearWithVarStride:
10648           Out << 's' << ParamAttr.StrideOrArg;
10649           break;
10650         case Linear:
10651           Out << 'l';
10652           if (!!ParamAttr.StrideOrArg)
10653             Out << ParamAttr.StrideOrArg;
10654           break;
10655         case Uniform:
10656           Out << 'u';
10657           break;
10658         case Vector:
10659           Out << 'v';
10660           break;
10661         }
10662         if (!!ParamAttr.Alignment)
10663           Out << 'a' << ParamAttr.Alignment;
10664       }
10665       Out << '_' << Fn->getName();
10666       Fn->addFnAttr(Out.str());
10667     }
10668   }
10669 }
10670 
10671 // This are the Functions that are needed to mangle the name of the
10672 // vector functions generated by the compiler, according to the rules
10673 // defined in the "Vector Function ABI specifications for AArch64",
10674 // available at
10675 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10676 
10677 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10678 ///
10679 /// TODO: Need to implement the behavior for reference marked with a
10680 /// var or no linear modifiers (1.b in the section). For this, we
10681 /// need to extend ParamKindTy to support the linear modifiers.
10682 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10683   QT = QT.getCanonicalType();
10684 
10685   if (QT->isVoidType())
10686     return false;
10687 
10688   if (Kind == ParamKindTy::Uniform)
10689     return false;
10690 
10691   if (Kind == ParamKindTy::Linear)
10692     return false;
10693 
10694   // TODO: Handle linear references with modifiers
10695 
10696   if (Kind == ParamKindTy::LinearWithVarStride)
10697     return false;
10698 
10699   return true;
10700 }
10701 
10702 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10703 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10704   QT = QT.getCanonicalType();
10705   unsigned Size = C.getTypeSize(QT);
10706 
10707   // Only scalars and complex within 16 bytes wide set PVB to true.
10708   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10709     return false;
10710 
10711   if (QT->isFloatingType())
10712     return true;
10713 
10714   if (QT->isIntegerType())
10715     return true;
10716 
10717   if (QT->isPointerType())
10718     return true;
10719 
10720   // TODO: Add support for complex types (section 3.1.2, item 2).
10721 
10722   return false;
10723 }
10724 
10725 /// Computes the lane size (LS) of a return type or of an input parameter,
10726 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10727 /// TODO: Add support for references, section 3.2.1, item 1.
10728 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10729   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10730     QualType PTy = QT.getCanonicalType()->getPointeeType();
10731     if (getAArch64PBV(PTy, C))
10732       return C.getTypeSize(PTy);
10733   }
10734   if (getAArch64PBV(QT, C))
10735     return C.getTypeSize(QT);
10736 
10737   return C.getTypeSize(C.getUIntPtrType());
10738 }
10739 
10740 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10741 // signature of the scalar function, as defined in 3.2.2 of the
10742 // AAVFABI.
10743 static std::tuple<unsigned, unsigned, bool>
10744 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10745   QualType RetType = FD->getReturnType().getCanonicalType();
10746 
10747   ASTContext &C = FD->getASTContext();
10748 
10749   bool OutputBecomesInput = false;
10750 
10751   llvm::SmallVector<unsigned, 8> Sizes;
10752   if (!RetType->isVoidType()) {
10753     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10754     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10755       OutputBecomesInput = true;
10756   }
10757   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10758     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10759     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10760   }
10761 
10762   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10763   // The LS of a function parameter / return value can only be a power
10764   // of 2, starting from 8 bits, up to 128.
10765   assert(std::all_of(Sizes.begin(), Sizes.end(),
10766                      [](unsigned Size) {
10767                        return Size == 8 || Size == 16 || Size == 32 ||
10768                               Size == 64 || Size == 128;
10769                      }) &&
10770          "Invalid size");
10771 
10772   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10773                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10774                          OutputBecomesInput);
10775 }
10776 
10777 /// Mangle the parameter part of the vector function name according to
10778 /// their OpenMP classification. The mangling function is defined in
10779 /// section 3.5 of the AAVFABI.
10780 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10781   SmallString<256> Buffer;
10782   llvm::raw_svector_ostream Out(Buffer);
10783   for (const auto &ParamAttr : ParamAttrs) {
10784     switch (ParamAttr.Kind) {
10785     case LinearWithVarStride:
10786       Out << "ls" << ParamAttr.StrideOrArg;
10787       break;
10788     case Linear:
10789       Out << 'l';
10790       // Don't print the step value if it is not present or if it is
10791       // equal to 1.
10792       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10793         Out << ParamAttr.StrideOrArg;
10794       break;
10795     case Uniform:
10796       Out << 'u';
10797       break;
10798     case Vector:
10799       Out << 'v';
10800       break;
10801     }
10802 
10803     if (!!ParamAttr.Alignment)
10804       Out << 'a' << ParamAttr.Alignment;
10805   }
10806 
10807   return std::string(Out.str());
10808 }
10809 
10810 // Function used to add the attribute. The parameter `VLEN` is
10811 // templated to allow the use of "x" when targeting scalable functions
10812 // for SVE.
10813 template <typename T>
10814 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10815                                  char ISA, StringRef ParSeq,
10816                                  StringRef MangledName, bool OutputBecomesInput,
10817                                  llvm::Function *Fn) {
10818   SmallString<256> Buffer;
10819   llvm::raw_svector_ostream Out(Buffer);
10820   Out << Prefix << ISA << LMask << VLEN;
10821   if (OutputBecomesInput)
10822     Out << "v";
10823   Out << ParSeq << "_" << MangledName;
10824   Fn->addFnAttr(Out.str());
10825 }
10826 
10827 // Helper function to generate the Advanced SIMD names depending on
10828 // the value of the NDS when simdlen is not present.
10829 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10830                                       StringRef Prefix, char ISA,
10831                                       StringRef ParSeq, StringRef MangledName,
10832                                       bool OutputBecomesInput,
10833                                       llvm::Function *Fn) {
10834   switch (NDS) {
10835   case 8:
10836     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10837                          OutputBecomesInput, Fn);
10838     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10839                          OutputBecomesInput, Fn);
10840     break;
10841   case 16:
10842     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10843                          OutputBecomesInput, Fn);
10844     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10845                          OutputBecomesInput, Fn);
10846     break;
10847   case 32:
10848     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10849                          OutputBecomesInput, Fn);
10850     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10851                          OutputBecomesInput, Fn);
10852     break;
10853   case 64:
10854   case 128:
10855     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10856                          OutputBecomesInput, Fn);
10857     break;
10858   default:
10859     llvm_unreachable("Scalar type is too wide.");
10860   }
10861 }
10862 
10863 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10864 static void emitAArch64DeclareSimdFunction(
10865     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10866     ArrayRef<ParamAttrTy> ParamAttrs,
10867     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10868     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10869 
10870   // Get basic data for building the vector signature.
10871   const auto Data = getNDSWDS(FD, ParamAttrs);
10872   const unsigned NDS = std::get<0>(Data);
10873   const unsigned WDS = std::get<1>(Data);
10874   const bool OutputBecomesInput = std::get<2>(Data);
10875 
10876   // Check the values provided via `simdlen` by the user.
10877   // 1. A `simdlen(1)` doesn't produce vector signatures,
10878   if (UserVLEN == 1) {
10879     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10880         DiagnosticsEngine::Warning,
10881         "The clause simdlen(1) has no effect when targeting aarch64.");
10882     CGM.getDiags().Report(SLoc, DiagID);
10883     return;
10884   }
10885 
10886   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10887   // Advanced SIMD output.
10888   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10889     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10890         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10891                                     "power of 2 when targeting Advanced SIMD.");
10892     CGM.getDiags().Report(SLoc, DiagID);
10893     return;
10894   }
10895 
10896   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10897   // limits.
10898   if (ISA == 's' && UserVLEN != 0) {
10899     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10900       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10901           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10902                                       "lanes in the architectural constraints "
10903                                       "for SVE (min is 128-bit, max is "
10904                                       "2048-bit, by steps of 128-bit)");
10905       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10906       return;
10907     }
10908   }
10909 
10910   // Sort out parameter sequence.
10911   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10912   StringRef Prefix = "_ZGV";
10913   // Generate simdlen from user input (if any).
10914   if (UserVLEN) {
10915     if (ISA == 's') {
10916       // SVE generates only a masked function.
10917       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10918                            OutputBecomesInput, Fn);
10919     } else {
10920       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10921       // Advanced SIMD generates one or two functions, depending on
10922       // the `[not]inbranch` clause.
10923       switch (State) {
10924       case OMPDeclareSimdDeclAttr::BS_Undefined:
10925         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10926                              OutputBecomesInput, Fn);
10927         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10928                              OutputBecomesInput, Fn);
10929         break;
10930       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10931         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10932                              OutputBecomesInput, Fn);
10933         break;
10934       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10935         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10936                              OutputBecomesInput, Fn);
10937         break;
10938       }
10939     }
10940   } else {
10941     // If no user simdlen is provided, follow the AAVFABI rules for
10942     // generating the vector length.
10943     if (ISA == 's') {
10944       // SVE, section 3.4.1, item 1.
10945       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10946                            OutputBecomesInput, Fn);
10947     } else {
10948       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10949       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10950       // two vector names depending on the use of the clause
10951       // `[not]inbranch`.
10952       switch (State) {
10953       case OMPDeclareSimdDeclAttr::BS_Undefined:
10954         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10955                                   OutputBecomesInput, Fn);
10956         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10957                                   OutputBecomesInput, Fn);
10958         break;
10959       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10960         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10961                                   OutputBecomesInput, Fn);
10962         break;
10963       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10964         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10965                                   OutputBecomesInput, Fn);
10966         break;
10967       }
10968     }
10969   }
10970 }
10971 
10972 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10973                                               llvm::Function *Fn) {
10974   ASTContext &C = CGM.getContext();
10975   FD = FD->getMostRecentDecl();
10976   // Map params to their positions in function decl.
10977   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10978   if (isa<CXXMethodDecl>(FD))
10979     ParamPositions.try_emplace(FD, 0);
10980   unsigned ParamPos = ParamPositions.size();
10981   for (const ParmVarDecl *P : FD->parameters()) {
10982     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10983     ++ParamPos;
10984   }
10985   while (FD) {
10986     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10987       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10988       // Mark uniform parameters.
10989       for (const Expr *E : Attr->uniforms()) {
10990         E = E->IgnoreParenImpCasts();
10991         unsigned Pos;
10992         if (isa<CXXThisExpr>(E)) {
10993           Pos = ParamPositions[FD];
10994         } else {
10995           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10996                                 ->getCanonicalDecl();
10997           Pos = ParamPositions[PVD];
10998         }
10999         ParamAttrs[Pos].Kind = Uniform;
11000       }
11001       // Get alignment info.
11002       auto NI = Attr->alignments_begin();
11003       for (const Expr *E : Attr->aligneds()) {
11004         E = E->IgnoreParenImpCasts();
11005         unsigned Pos;
11006         QualType ParmTy;
11007         if (isa<CXXThisExpr>(E)) {
11008           Pos = ParamPositions[FD];
11009           ParmTy = E->getType();
11010         } else {
11011           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11012                                 ->getCanonicalDecl();
11013           Pos = ParamPositions[PVD];
11014           ParmTy = PVD->getType();
11015         }
11016         ParamAttrs[Pos].Alignment =
11017             (*NI)
11018                 ? (*NI)->EvaluateKnownConstInt(C)
11019                 : llvm::APSInt::getUnsigned(
11020                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11021                           .getQuantity());
11022         ++NI;
11023       }
11024       // Mark linear parameters.
11025       auto SI = Attr->steps_begin();
11026       auto MI = Attr->modifiers_begin();
11027       for (const Expr *E : Attr->linears()) {
11028         E = E->IgnoreParenImpCasts();
11029         unsigned Pos;
11030         if (isa<CXXThisExpr>(E)) {
11031           Pos = ParamPositions[FD];
11032         } else {
11033           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11034                                 ->getCanonicalDecl();
11035           Pos = ParamPositions[PVD];
11036         }
11037         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11038         ParamAttr.Kind = Linear;
11039         if (*SI) {
11040           Expr::EvalResult Result;
11041           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11042             if (const auto *DRE =
11043                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11044               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11045                 ParamAttr.Kind = LinearWithVarStride;
11046                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11047                     ParamPositions[StridePVD->getCanonicalDecl()]);
11048               }
11049             }
11050           } else {
11051             ParamAttr.StrideOrArg = Result.Val.getInt();
11052           }
11053         }
11054         ++SI;
11055         ++MI;
11056       }
11057       llvm::APSInt VLENVal;
11058       SourceLocation ExprLoc;
11059       const Expr *VLENExpr = Attr->getSimdlen();
11060       if (VLENExpr) {
11061         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11062         ExprLoc = VLENExpr->getExprLoc();
11063       }
11064       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11065       if (CGM.getTriple().isX86()) {
11066         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11067       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11068         unsigned VLEN = VLENVal.getExtValue();
11069         StringRef MangledName = Fn->getName();
11070         if (CGM.getTarget().hasFeature("sve"))
11071           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11072                                          MangledName, 's', 128, Fn, ExprLoc);
11073         if (CGM.getTarget().hasFeature("neon"))
11074           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11075                                          MangledName, 'n', 128, Fn, ExprLoc);
11076       }
11077     }
11078     FD = FD->getPreviousDecl();
11079   }
11080 }
11081 
11082 namespace {
11083 /// Cleanup action for doacross support.
11084 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11085 public:
11086   static const int DoacrossFinArgs = 2;
11087 
11088 private:
11089   llvm::FunctionCallee RTLFn;
11090   llvm::Value *Args[DoacrossFinArgs];
11091 
11092 public:
11093   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11094                     ArrayRef<llvm::Value *> CallArgs)
11095       : RTLFn(RTLFn) {
11096     assert(CallArgs.size() == DoacrossFinArgs);
11097     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11098   }
11099   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11100     if (!CGF.HaveInsertPoint())
11101       return;
11102     CGF.EmitRuntimeCall(RTLFn, Args);
11103   }
11104 };
11105 } // namespace
11106 
11107 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11108                                        const OMPLoopDirective &D,
11109                                        ArrayRef<Expr *> NumIterations) {
11110   if (!CGF.HaveInsertPoint())
11111     return;
11112 
11113   ASTContext &C = CGM.getContext();
11114   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11115   RecordDecl *RD;
11116   if (KmpDimTy.isNull()) {
11117     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11118     //  kmp_int64 lo; // lower
11119     //  kmp_int64 up; // upper
11120     //  kmp_int64 st; // stride
11121     // };
11122     RD = C.buildImplicitRecord("kmp_dim");
11123     RD->startDefinition();
11124     addFieldToRecordDecl(C, RD, Int64Ty);
11125     addFieldToRecordDecl(C, RD, Int64Ty);
11126     addFieldToRecordDecl(C, RD, Int64Ty);
11127     RD->completeDefinition();
11128     KmpDimTy = C.getRecordType(RD);
11129   } else {
11130     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11131   }
11132   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11133   QualType ArrayTy =
11134       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11135 
11136   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11137   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11138   enum { LowerFD = 0, UpperFD, StrideFD };
11139   // Fill dims with data.
11140   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11141     LValue DimsLVal = CGF.MakeAddrLValue(
11142         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11143     // dims.upper = num_iterations;
11144     LValue UpperLVal = CGF.EmitLValueForField(
11145         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11146     llvm::Value *NumIterVal =
11147         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
11148                                  D.getNumIterations()->getType(), Int64Ty,
11149                                  D.getNumIterations()->getExprLoc());
11150     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11151     // dims.stride = 1;
11152     LValue StrideLVal = CGF.EmitLValueForField(
11153         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11154     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11155                           StrideLVal);
11156   }
11157 
11158   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11159   // kmp_int32 num_dims, struct kmp_dim * dims);
11160   llvm::Value *Args[] = {
11161       emitUpdateLocation(CGF, D.getBeginLoc()),
11162       getThreadID(CGF, D.getBeginLoc()),
11163       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11164       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11165           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11166           CGM.VoidPtrTy)};
11167 
11168   llvm::FunctionCallee RTLFn =
11169       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11170   CGF.EmitRuntimeCall(RTLFn, Args);
11171   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11172       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11173   llvm::FunctionCallee FiniRTLFn =
11174       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11175   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11176                                              llvm::makeArrayRef(FiniArgs));
11177 }
11178 
11179 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11180                                           const OMPDependClause *C) {
11181   QualType Int64Ty =
11182       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11183   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11184   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11185       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11186   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11187   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11188     const Expr *CounterVal = C->getLoopData(I);
11189     assert(CounterVal);
11190     llvm::Value *CntVal = CGF.EmitScalarConversion(
11191         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11192         CounterVal->getExprLoc());
11193     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11194                           /*Volatile=*/false, Int64Ty);
11195   }
11196   llvm::Value *Args[] = {
11197       emitUpdateLocation(CGF, C->getBeginLoc()),
11198       getThreadID(CGF, C->getBeginLoc()),
11199       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11200   llvm::FunctionCallee RTLFn;
11201   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11202     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11203   } else {
11204     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11205     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11206   }
11207   CGF.EmitRuntimeCall(RTLFn, Args);
11208 }
11209 
11210 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11211                                llvm::FunctionCallee Callee,
11212                                ArrayRef<llvm::Value *> Args) const {
11213   assert(Loc.isValid() && "Outlined function call location must be valid.");
11214   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11215 
11216   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11217     if (Fn->doesNotThrow()) {
11218       CGF.EmitNounwindRuntimeCall(Fn, Args);
11219       return;
11220     }
11221   }
11222   CGF.EmitRuntimeCall(Callee, Args);
11223 }
11224 
11225 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11226     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11227     ArrayRef<llvm::Value *> Args) const {
11228   emitCall(CGF, Loc, OutlinedFn, Args);
11229 }
11230 
11231 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11232   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11233     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11234       HasEmittedDeclareTargetRegion = true;
11235 }
11236 
11237 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11238                                              const VarDecl *NativeParam,
11239                                              const VarDecl *TargetParam) const {
11240   return CGF.GetAddrOfLocalVar(NativeParam);
11241 }
11242 
11243 namespace {
11244 /// Cleanup action for allocate support.
11245 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11246 public:
11247   static const int CleanupArgs = 3;
11248 
11249 private:
11250   llvm::FunctionCallee RTLFn;
11251   llvm::Value *Args[CleanupArgs];
11252 
11253 public:
11254   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11255                        ArrayRef<llvm::Value *> CallArgs)
11256       : RTLFn(RTLFn) {
11257     assert(CallArgs.size() == CleanupArgs &&
11258            "Size of arguments does not match.");
11259     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11260   }
11261   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11262     if (!CGF.HaveInsertPoint())
11263       return;
11264     CGF.EmitRuntimeCall(RTLFn, Args);
11265   }
11266 };
11267 } // namespace
11268 
11269 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11270                                                    const VarDecl *VD) {
11271   if (!VD)
11272     return Address::invalid();
11273   const VarDecl *CVD = VD->getCanonicalDecl();
11274   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11275     return Address::invalid();
11276   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11277   // Use the default allocation.
11278   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11279       !AA->getAllocator())
11280     return Address::invalid();
11281   llvm::Value *Size;
11282   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11283   if (CVD->getType()->isVariablyModifiedType()) {
11284     Size = CGF.getTypeSize(CVD->getType());
11285     // Align the size: ((size + align - 1) / align) * align
11286     Size = CGF.Builder.CreateNUWAdd(
11287         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11288     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11289     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11290   } else {
11291     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11292     Size = CGM.getSize(Sz.alignTo(Align));
11293   }
11294   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11295   assert(AA->getAllocator() &&
11296          "Expected allocator expression for non-default allocator.");
11297   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11298   // According to the standard, the original allocator type is a enum (integer).
11299   // Convert to pointer type, if required.
11300   if (Allocator->getType()->isIntegerTy())
11301     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11302   else if (Allocator->getType()->isPointerTy())
11303     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11304                                                                 CGM.VoidPtrTy);
11305   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11306 
11307   llvm::Value *Addr =
11308       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11309                           getName({CVD->getName(), ".void.addr"}));
11310   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11311                                                               Allocator};
11312   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11313 
11314   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11315                                                 llvm::makeArrayRef(FiniArgs));
11316   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11317       Addr,
11318       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11319       getName({CVD->getName(), ".addr"}));
11320   return Address(Addr, Align);
11321 }
11322 
11323 /// Finds the variant function that matches current context with its context
11324 /// selector.
11325 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11326                                                      const FunctionDecl *FD) {
11327   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11328     return FD;
11329 
11330   SmallVector<Expr *, 8> VariantExprs;
11331   SmallVector<VariantMatchInfo, 8> VMIs;
11332   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11333     const OMPTraitInfo &TI = A->getTraitInfos();
11334     VMIs.push_back(VariantMatchInfo());
11335     TI.getAsVariantMatchInfo(CGM.getContext(), VMIs.back());
11336     VariantExprs.push_back(A->getVariantFuncRef());
11337   }
11338 
11339   OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple());
11340   // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we
11341   //        build them.
11342 
11343   int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx);
11344   if (BestMatchIdx < 0)
11345     return FD;
11346 
11347   return cast<FunctionDecl>(
11348       cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts())
11349           ->getDecl());
11350 }
11351 
11352 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11353   const auto *D = cast<FunctionDecl>(GD.getDecl());
11354   // If the original function is defined already, use its definition.
11355   StringRef MangledName = CGM.getMangledName(GD);
11356   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11357   if (Orig && !Orig->isDeclaration())
11358     return false;
11359   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11360   // Emit original function if it does not have declare variant attribute or the
11361   // context does not match.
11362   if (NewFD == D)
11363     return false;
11364   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11365   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11366     DeferredVariantFunction.erase(D);
11367     return true;
11368   }
11369   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11370   return true;
11371 }
11372 
11373 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11374     CodeGenModule &CGM, const OMPLoopDirective &S)
11375     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11376   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11377   if (!NeedToPush)
11378     return;
11379   NontemporalDeclsSet &DS =
11380       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11381   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11382     for (const Stmt *Ref : C->private_refs()) {
11383       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11384       const ValueDecl *VD;
11385       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11386         VD = DRE->getDecl();
11387       } else {
11388         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11389         assert((ME->isImplicitCXXThis() ||
11390                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11391                "Expected member of current class.");
11392         VD = ME->getMemberDecl();
11393       }
11394       DS.insert(VD);
11395     }
11396   }
11397 }
11398 
11399 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11400   if (!NeedToPush)
11401     return;
11402   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11403 }
11404 
11405 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11406   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11407 
11408   return llvm::any_of(
11409       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11410       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11411 }
11412 
11413 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11414     const OMPExecutableDirective &S,
11415     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11416     const {
11417   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11418   // Vars in target/task regions must be excluded completely.
11419   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11420       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11421     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11422     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11423     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11424     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11425       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11426         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11427     }
11428   }
11429   // Exclude vars in private clauses.
11430   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11431     for (const Expr *Ref : C->varlists()) {
11432       if (!Ref->getType()->isScalarType())
11433         continue;
11434       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11435       if (!DRE)
11436         continue;
11437       NeedToCheckForLPCs.insert(DRE->getDecl());
11438     }
11439   }
11440   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11441     for (const Expr *Ref : C->varlists()) {
11442       if (!Ref->getType()->isScalarType())
11443         continue;
11444       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11445       if (!DRE)
11446         continue;
11447       NeedToCheckForLPCs.insert(DRE->getDecl());
11448     }
11449   }
11450   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11451     for (const Expr *Ref : C->varlists()) {
11452       if (!Ref->getType()->isScalarType())
11453         continue;
11454       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11455       if (!DRE)
11456         continue;
11457       NeedToCheckForLPCs.insert(DRE->getDecl());
11458     }
11459   }
11460   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11461     for (const Expr *Ref : C->varlists()) {
11462       if (!Ref->getType()->isScalarType())
11463         continue;
11464       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11465       if (!DRE)
11466         continue;
11467       NeedToCheckForLPCs.insert(DRE->getDecl());
11468     }
11469   }
11470   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11471     for (const Expr *Ref : C->varlists()) {
11472       if (!Ref->getType()->isScalarType())
11473         continue;
11474       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11475       if (!DRE)
11476         continue;
11477       NeedToCheckForLPCs.insert(DRE->getDecl());
11478     }
11479   }
11480   for (const Decl *VD : NeedToCheckForLPCs) {
11481     for (const LastprivateConditionalData &Data :
11482          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11483       if (Data.DeclToUniqueName.count(VD) > 0) {
11484         if (!Data.Disabled)
11485           NeedToAddForLPCsAsDisabled.insert(VD);
11486         break;
11487       }
11488     }
11489   }
11490 }
11491 
11492 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11493     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11494     : CGM(CGF.CGM),
11495       Action((CGM.getLangOpts().OpenMP >= 50 &&
11496               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11497                            [](const OMPLastprivateClause *C) {
11498                              return C->getKind() ==
11499                                     OMPC_LASTPRIVATE_conditional;
11500                            }))
11501                  ? ActionToDo::PushAsLastprivateConditional
11502                  : ActionToDo::DoNotPush) {
11503   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11504   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11505     return;
11506   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11507          "Expected a push action.");
11508   LastprivateConditionalData &Data =
11509       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11510   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11511     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11512       continue;
11513 
11514     for (const Expr *Ref : C->varlists()) {
11515       Data.DeclToUniqueName.insert(std::make_pair(
11516           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11517           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11518     }
11519   }
11520   Data.IVLVal = IVLVal;
11521   Data.Fn = CGF.CurFn;
11522 }
11523 
11524 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11525     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11526     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11527   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11528   if (CGM.getLangOpts().OpenMP < 50)
11529     return;
11530   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11531   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11532   if (!NeedToAddForLPCsAsDisabled.empty()) {
11533     Action = ActionToDo::DisableLastprivateConditional;
11534     LastprivateConditionalData &Data =
11535         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11536     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11537       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11538     Data.Fn = CGF.CurFn;
11539     Data.Disabled = true;
11540   }
11541 }
11542 
11543 CGOpenMPRuntime::LastprivateConditionalRAII
11544 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11545     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11546   return LastprivateConditionalRAII(CGF, S);
11547 }
11548 
11549 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11550   if (CGM.getLangOpts().OpenMP < 50)
11551     return;
11552   if (Action == ActionToDo::DisableLastprivateConditional) {
11553     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11554            "Expected list of disabled private vars.");
11555     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11556   }
11557   if (Action == ActionToDo::PushAsLastprivateConditional) {
11558     assert(
11559         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11560         "Expected list of lastprivate conditional vars.");
11561     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11562   }
11563 }
11564 
11565 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11566                                                         const VarDecl *VD) {
11567   ASTContext &C = CGM.getContext();
11568   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11569   if (I == LastprivateConditionalToTypes.end())
11570     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11571   QualType NewType;
11572   const FieldDecl *VDField;
11573   const FieldDecl *FiredField;
11574   LValue BaseLVal;
11575   auto VI = I->getSecond().find(VD);
11576   if (VI == I->getSecond().end()) {
11577     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11578     RD->startDefinition();
11579     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11580     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11581     RD->completeDefinition();
11582     NewType = C.getRecordType(RD);
11583     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11584     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11585     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11586   } else {
11587     NewType = std::get<0>(VI->getSecond());
11588     VDField = std::get<1>(VI->getSecond());
11589     FiredField = std::get<2>(VI->getSecond());
11590     BaseLVal = std::get<3>(VI->getSecond());
11591   }
11592   LValue FiredLVal =
11593       CGF.EmitLValueForField(BaseLVal, FiredField);
11594   CGF.EmitStoreOfScalar(
11595       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11596       FiredLVal);
11597   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11598 }
11599 
11600 namespace {
11601 /// Checks if the lastprivate conditional variable is referenced in LHS.
11602 class LastprivateConditionalRefChecker final
11603     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11604   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11605   const Expr *FoundE = nullptr;
11606   const Decl *FoundD = nullptr;
11607   StringRef UniqueDeclName;
11608   LValue IVLVal;
11609   llvm::Function *FoundFn = nullptr;
11610   SourceLocation Loc;
11611 
11612 public:
11613   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11614     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11615          llvm::reverse(LPM)) {
11616       auto It = D.DeclToUniqueName.find(E->getDecl());
11617       if (It == D.DeclToUniqueName.end())
11618         continue;
11619       if (D.Disabled)
11620         return false;
11621       FoundE = E;
11622       FoundD = E->getDecl()->getCanonicalDecl();
11623       UniqueDeclName = It->second;
11624       IVLVal = D.IVLVal;
11625       FoundFn = D.Fn;
11626       break;
11627     }
11628     return FoundE == E;
11629   }
11630   bool VisitMemberExpr(const MemberExpr *E) {
11631     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11632       return false;
11633     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11634          llvm::reverse(LPM)) {
11635       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11636       if (It == D.DeclToUniqueName.end())
11637         continue;
11638       if (D.Disabled)
11639         return false;
11640       FoundE = E;
11641       FoundD = E->getMemberDecl()->getCanonicalDecl();
11642       UniqueDeclName = It->second;
11643       IVLVal = D.IVLVal;
11644       FoundFn = D.Fn;
11645       break;
11646     }
11647     return FoundE == E;
11648   }
11649   bool VisitStmt(const Stmt *S) {
11650     for (const Stmt *Child : S->children()) {
11651       if (!Child)
11652         continue;
11653       if (const auto *E = dyn_cast<Expr>(Child))
11654         if (!E->isGLValue())
11655           continue;
11656       if (Visit(Child))
11657         return true;
11658     }
11659     return false;
11660   }
11661   explicit LastprivateConditionalRefChecker(
11662       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11663       : LPM(LPM) {}
11664   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11665   getFoundData() const {
11666     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11667   }
11668 };
11669 } // namespace
11670 
11671 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11672                                                        LValue IVLVal,
11673                                                        StringRef UniqueDeclName,
11674                                                        LValue LVal,
11675                                                        SourceLocation Loc) {
11676   // Last updated loop counter for the lastprivate conditional var.
11677   // int<xx> last_iv = 0;
11678   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11679   llvm::Constant *LastIV =
11680       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11681   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11682       IVLVal.getAlignment().getAsAlign());
11683   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11684 
11685   // Last value of the lastprivate conditional.
11686   // decltype(priv_a) last_a;
11687   llvm::Constant *Last = getOrCreateInternalVariable(
11688       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11689   cast<llvm::GlobalVariable>(Last)->setAlignment(
11690       LVal.getAlignment().getAsAlign());
11691   LValue LastLVal =
11692       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11693 
11694   // Global loop counter. Required to handle inner parallel-for regions.
11695   // iv
11696   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11697 
11698   // #pragma omp critical(a)
11699   // if (last_iv <= iv) {
11700   //   last_iv = iv;
11701   //   last_a = priv_a;
11702   // }
11703   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11704                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11705     Action.Enter(CGF);
11706     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11707     // (last_iv <= iv) ? Check if the variable is updated and store new
11708     // value in global var.
11709     llvm::Value *CmpRes;
11710     if (IVLVal.getType()->isSignedIntegerType()) {
11711       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11712     } else {
11713       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11714              "Loop iteration variable must be integer.");
11715       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11716     }
11717     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11718     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11719     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11720     // {
11721     CGF.EmitBlock(ThenBB);
11722 
11723     //   last_iv = iv;
11724     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11725 
11726     //   last_a = priv_a;
11727     switch (CGF.getEvaluationKind(LVal.getType())) {
11728     case TEK_Scalar: {
11729       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11730       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11731       break;
11732     }
11733     case TEK_Complex: {
11734       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11735       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11736       break;
11737     }
11738     case TEK_Aggregate:
11739       llvm_unreachable(
11740           "Aggregates are not supported in lastprivate conditional.");
11741     }
11742     // }
11743     CGF.EmitBranch(ExitBB);
11744     // There is no need to emit line number for unconditional branch.
11745     (void)ApplyDebugLocation::CreateEmpty(CGF);
11746     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11747   };
11748 
11749   if (CGM.getLangOpts().OpenMPSimd) {
11750     // Do not emit as a critical region as no parallel region could be emitted.
11751     RegionCodeGenTy ThenRCG(CodeGen);
11752     ThenRCG(CGF);
11753   } else {
11754     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11755   }
11756 }
11757 
11758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11759                                                          const Expr *LHS) {
11760   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11761     return;
11762   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11763   if (!Checker.Visit(LHS))
11764     return;
11765   const Expr *FoundE;
11766   const Decl *FoundD;
11767   StringRef UniqueDeclName;
11768   LValue IVLVal;
11769   llvm::Function *FoundFn;
11770   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11771       Checker.getFoundData();
11772   if (FoundFn != CGF.CurFn) {
11773     // Special codegen for inner parallel regions.
11774     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11775     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11776     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11777            "Lastprivate conditional is not found in outer region.");
11778     QualType StructTy = std::get<0>(It->getSecond());
11779     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11780     LValue PrivLVal = CGF.EmitLValue(FoundE);
11781     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11782         PrivLVal.getAddress(CGF),
11783         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11784     LValue BaseLVal =
11785         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11786     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11787     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11788                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11789                         FiredLVal, llvm::AtomicOrdering::Unordered,
11790                         /*IsVolatile=*/true, /*isInit=*/false);
11791     return;
11792   }
11793 
11794   // Private address of the lastprivate conditional in the current context.
11795   // priv_a
11796   LValue LVal = CGF.EmitLValue(FoundE);
11797   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11798                                    FoundE->getExprLoc());
11799 }
11800 
11801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11802     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11803     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11804   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11805     return;
11806   auto Range = llvm::reverse(LastprivateConditionalStack);
11807   auto It = llvm::find_if(
11808       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11809   if (It == Range.end() || It->Fn != CGF.CurFn)
11810     return;
11811   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11812   assert(LPCI != LastprivateConditionalToTypes.end() &&
11813          "Lastprivates must be registered already.");
11814   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11815   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11816   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11817   for (const auto &Pair : It->DeclToUniqueName) {
11818     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11819     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11820       continue;
11821     auto I = LPCI->getSecond().find(Pair.first);
11822     assert(I != LPCI->getSecond().end() &&
11823            "Lastprivate must be rehistered already.");
11824     // bool Cmp = priv_a.Fired != 0;
11825     LValue BaseLVal = std::get<3>(I->getSecond());
11826     LValue FiredLVal =
11827         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11828     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11829     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11830     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11831     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11832     // if (Cmp) {
11833     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11834     CGF.EmitBlock(ThenBB);
11835     Address Addr = CGF.GetAddrOfLocalVar(VD);
11836     LValue LVal;
11837     if (VD->getType()->isReferenceType())
11838       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11839                                            AlignmentSource::Decl);
11840     else
11841       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11842                                 AlignmentSource::Decl);
11843     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11844                                      D.getBeginLoc());
11845     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11846     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11847     // }
11848   }
11849 }
11850 
11851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11852     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11853     SourceLocation Loc) {
11854   if (CGF.getLangOpts().OpenMP < 50)
11855     return;
11856   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11857   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11858          "Unknown lastprivate conditional variable.");
11859   StringRef UniqueName = It->second;
11860   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11861   // The variable was not updated in the region - exit.
11862   if (!GV)
11863     return;
11864   LValue LPLVal = CGF.MakeAddrLValue(
11865       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11866   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11867   CGF.EmitStoreOfScalar(Res, PrivLVal);
11868 }
11869 
11870 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11871     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11872     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11873   llvm_unreachable("Not supported in SIMD-only mode");
11874 }
11875 
11876 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11877     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11878     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11879   llvm_unreachable("Not supported in SIMD-only mode");
11880 }
11881 
11882 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11883     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11884     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11885     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11886     bool Tied, unsigned &NumberOfParts) {
11887   llvm_unreachable("Not supported in SIMD-only mode");
11888 }
11889 
11890 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11891                                            SourceLocation Loc,
11892                                            llvm::Function *OutlinedFn,
11893                                            ArrayRef<llvm::Value *> CapturedVars,
11894                                            const Expr *IfCond) {
11895   llvm_unreachable("Not supported in SIMD-only mode");
11896 }
11897 
11898 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11899     CodeGenFunction &CGF, StringRef CriticalName,
11900     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11901     const Expr *Hint) {
11902   llvm_unreachable("Not supported in SIMD-only mode");
11903 }
11904 
11905 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11906                                            const RegionCodeGenTy &MasterOpGen,
11907                                            SourceLocation Loc) {
11908   llvm_unreachable("Not supported in SIMD-only mode");
11909 }
11910 
11911 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11912                                             SourceLocation Loc) {
11913   llvm_unreachable("Not supported in SIMD-only mode");
11914 }
11915 
11916 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11917     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11918     SourceLocation Loc) {
11919   llvm_unreachable("Not supported in SIMD-only mode");
11920 }
11921 
11922 void CGOpenMPSIMDRuntime::emitSingleRegion(
11923     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11924     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11925     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11926     ArrayRef<const Expr *> AssignmentOps) {
11927   llvm_unreachable("Not supported in SIMD-only mode");
11928 }
11929 
11930 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11931                                             const RegionCodeGenTy &OrderedOpGen,
11932                                             SourceLocation Loc,
11933                                             bool IsThreads) {
11934   llvm_unreachable("Not supported in SIMD-only mode");
11935 }
11936 
11937 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11938                                           SourceLocation Loc,
11939                                           OpenMPDirectiveKind Kind,
11940                                           bool EmitChecks,
11941                                           bool ForceSimpleCall) {
11942   llvm_unreachable("Not supported in SIMD-only mode");
11943 }
11944 
11945 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11946     CodeGenFunction &CGF, SourceLocation Loc,
11947     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11948     bool Ordered, const DispatchRTInput &DispatchValues) {
11949   llvm_unreachable("Not supported in SIMD-only mode");
11950 }
11951 
11952 void CGOpenMPSIMDRuntime::emitForStaticInit(
11953     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11954     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11955   llvm_unreachable("Not supported in SIMD-only mode");
11956 }
11957 
11958 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11959     CodeGenFunction &CGF, SourceLocation Loc,
11960     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11961   llvm_unreachable("Not supported in SIMD-only mode");
11962 }
11963 
11964 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11965                                                      SourceLocation Loc,
11966                                                      unsigned IVSize,
11967                                                      bool IVSigned) {
11968   llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970 
11971 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11972                                               SourceLocation Loc,
11973                                               OpenMPDirectiveKind DKind) {
11974   llvm_unreachable("Not supported in SIMD-only mode");
11975 }
11976 
11977 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11978                                               SourceLocation Loc,
11979                                               unsigned IVSize, bool IVSigned,
11980                                               Address IL, Address LB,
11981                                               Address UB, Address ST) {
11982   llvm_unreachable("Not supported in SIMD-only mode");
11983 }
11984 
11985 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11986                                                llvm::Value *NumThreads,
11987                                                SourceLocation Loc) {
11988   llvm_unreachable("Not supported in SIMD-only mode");
11989 }
11990 
11991 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11992                                              ProcBindKind ProcBind,
11993                                              SourceLocation Loc) {
11994   llvm_unreachable("Not supported in SIMD-only mode");
11995 }
11996 
11997 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11998                                                     const VarDecl *VD,
11999                                                     Address VDAddr,
12000                                                     SourceLocation Loc) {
12001   llvm_unreachable("Not supported in SIMD-only mode");
12002 }
12003 
12004 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12005     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12006     CodeGenFunction *CGF) {
12007   llvm_unreachable("Not supported in SIMD-only mode");
12008 }
12009 
12010 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12011     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12012   llvm_unreachable("Not supported in SIMD-only mode");
12013 }
12014 
12015 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12016                                     ArrayRef<const Expr *> Vars,
12017                                     SourceLocation Loc,
12018                                     llvm::AtomicOrdering AO) {
12019   llvm_unreachable("Not supported in SIMD-only mode");
12020 }
12021 
12022 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12023                                        const OMPExecutableDirective &D,
12024                                        llvm::Function *TaskFunction,
12025                                        QualType SharedsTy, Address Shareds,
12026                                        const Expr *IfCond,
12027                                        const OMPTaskDataTy &Data) {
12028   llvm_unreachable("Not supported in SIMD-only mode");
12029 }
12030 
12031 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12032     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12033     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12034     const Expr *IfCond, const OMPTaskDataTy &Data) {
12035   llvm_unreachable("Not supported in SIMD-only mode");
12036 }
12037 
12038 void CGOpenMPSIMDRuntime::emitReduction(
12039     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12040     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12041     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12042   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12043   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12044                                  ReductionOps, Options);
12045 }
12046 
12047 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12048     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12049     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12050   llvm_unreachable("Not supported in SIMD-only mode");
12051 }
12052 
12053 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12054                                                   SourceLocation Loc,
12055                                                   ReductionCodeGen &RCG,
12056                                                   unsigned N) {
12057   llvm_unreachable("Not supported in SIMD-only mode");
12058 }
12059 
12060 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12061                                                   SourceLocation Loc,
12062                                                   llvm::Value *ReductionsPtr,
12063                                                   LValue SharedLVal) {
12064   llvm_unreachable("Not supported in SIMD-only mode");
12065 }
12066 
12067 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12068                                            SourceLocation Loc) {
12069   llvm_unreachable("Not supported in SIMD-only mode");
12070 }
12071 
12072 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12073     CodeGenFunction &CGF, SourceLocation Loc,
12074     OpenMPDirectiveKind CancelRegion) {
12075   llvm_unreachable("Not supported in SIMD-only mode");
12076 }
12077 
12078 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12079                                          SourceLocation Loc, const Expr *IfCond,
12080                                          OpenMPDirectiveKind CancelRegion) {
12081   llvm_unreachable("Not supported in SIMD-only mode");
12082 }
12083 
12084 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12085     const OMPExecutableDirective &D, StringRef ParentName,
12086     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12087     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12088   llvm_unreachable("Not supported in SIMD-only mode");
12089 }
12090 
12091 void CGOpenMPSIMDRuntime::emitTargetCall(
12092     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12093     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12094     const Expr *Device,
12095     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12096                                      const OMPLoopDirective &D)>
12097         SizeEmitter) {
12098   llvm_unreachable("Not supported in SIMD-only mode");
12099 }
12100 
12101 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12102   llvm_unreachable("Not supported in SIMD-only mode");
12103 }
12104 
12105 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12106   llvm_unreachable("Not supported in SIMD-only mode");
12107 }
12108 
12109 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12110   return false;
12111 }
12112 
12113 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12114                                         const OMPExecutableDirective &D,
12115                                         SourceLocation Loc,
12116                                         llvm::Function *OutlinedFn,
12117                                         ArrayRef<llvm::Value *> CapturedVars) {
12118   llvm_unreachable("Not supported in SIMD-only mode");
12119 }
12120 
12121 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12122                                              const Expr *NumTeams,
12123                                              const Expr *ThreadLimit,
12124                                              SourceLocation Loc) {
12125   llvm_unreachable("Not supported in SIMD-only mode");
12126 }
12127 
12128 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12129     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12130     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12131   llvm_unreachable("Not supported in SIMD-only mode");
12132 }
12133 
12134 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12135     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12136     const Expr *Device) {
12137   llvm_unreachable("Not supported in SIMD-only mode");
12138 }
12139 
12140 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12141                                            const OMPLoopDirective &D,
12142                                            ArrayRef<Expr *> NumIterations) {
12143   llvm_unreachable("Not supported in SIMD-only mode");
12144 }
12145 
12146 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12147                                               const OMPDependClause *C) {
12148   llvm_unreachable("Not supported in SIMD-only mode");
12149 }
12150 
12151 const VarDecl *
12152 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12153                                         const VarDecl *NativeParam) const {
12154   llvm_unreachable("Not supported in SIMD-only mode");
12155 }
12156 
12157 Address
12158 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12159                                          const VarDecl *NativeParam,
12160                                          const VarDecl *TargetParam) const {
12161   llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163