1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 enum OpenMPRTLFunction {
572   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
573   /// kmpc_micro microtask, ...);
574   OMPRTL__kmpc_fork_call,
575   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
576   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
577   OMPRTL__kmpc_threadprivate_cached,
578   /// Call to void __kmpc_threadprivate_register( ident_t *,
579   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
580   OMPRTL__kmpc_threadprivate_register,
581   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
582   OMPRTL__kmpc_global_thread_num,
583   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *crit);
585   OMPRTL__kmpc_critical,
586   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
587   // global_tid, kmp_critical_name *crit, uintptr_t hint);
588   OMPRTL__kmpc_critical_with_hint,
589   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
590   // kmp_critical_name *crit);
591   OMPRTL__kmpc_end_critical,
592   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
593   // global_tid);
594   OMPRTL__kmpc_cancel_barrier,
595   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
596   OMPRTL__kmpc_barrier,
597   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
598   OMPRTL__kmpc_for_static_fini,
599   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
600   // global_tid);
601   OMPRTL__kmpc_serialized_parallel,
602   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
603   // global_tid);
604   OMPRTL__kmpc_end_serialized_parallel,
605   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
606   // kmp_int32 num_threads);
607   OMPRTL__kmpc_push_num_threads,
608   // Call to void __kmpc_flush(ident_t *loc);
609   OMPRTL__kmpc_flush,
610   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
611   OMPRTL__kmpc_master,
612   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
613   OMPRTL__kmpc_end_master,
614   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
615   // int end_part);
616   OMPRTL__kmpc_omp_taskyield,
617   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
618   OMPRTL__kmpc_single,
619   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
620   OMPRTL__kmpc_end_single,
621   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
622   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
623   // kmp_routine_entry_t *task_entry);
624   OMPRTL__kmpc_omp_task_alloc,
625   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
626   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
627   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
628   // kmp_int64 device_id);
629   OMPRTL__kmpc_omp_target_task_alloc,
630   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
631   // new_task);
632   OMPRTL__kmpc_omp_task,
633   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
634   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
635   // kmp_int32 didit);
636   OMPRTL__kmpc_copyprivate,
637   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
638   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
639   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
640   OMPRTL__kmpc_reduce,
641   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
642   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
643   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
644   // *lck);
645   OMPRTL__kmpc_reduce_nowait,
646   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
647   // kmp_critical_name *lck);
648   OMPRTL__kmpc_end_reduce,
649   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
650   // kmp_critical_name *lck);
651   OMPRTL__kmpc_end_reduce_nowait,
652   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
653   // kmp_task_t * new_task);
654   OMPRTL__kmpc_omp_task_begin_if0,
655   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
656   // kmp_task_t * new_task);
657   OMPRTL__kmpc_omp_task_complete_if0,
658   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
659   OMPRTL__kmpc_ordered,
660   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
661   OMPRTL__kmpc_end_ordered,
662   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
663   // global_tid);
664   OMPRTL__kmpc_omp_taskwait,
665   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
666   OMPRTL__kmpc_taskgroup,
667   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
668   OMPRTL__kmpc_end_taskgroup,
669   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
670   // int proc_bind);
671   OMPRTL__kmpc_push_proc_bind,
672   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
673   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
674   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
675   OMPRTL__kmpc_omp_task_with_deps,
676   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
677   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
678   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
679   OMPRTL__kmpc_omp_wait_deps,
680   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
681   // global_tid, kmp_int32 cncl_kind);
682   OMPRTL__kmpc_cancellationpoint,
683   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
684   // kmp_int32 cncl_kind);
685   OMPRTL__kmpc_cancel,
686   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
687   // kmp_int32 num_teams, kmp_int32 thread_limit);
688   OMPRTL__kmpc_push_num_teams,
689   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
690   // microtask, ...);
691   OMPRTL__kmpc_fork_teams,
692   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
693   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
694   // sched, kmp_uint64 grainsize, void *task_dup);
695   OMPRTL__kmpc_taskloop,
696   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
697   // num_dims, struct kmp_dim *dims);
698   OMPRTL__kmpc_doacross_init,
699   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
700   OMPRTL__kmpc_doacross_fini,
701   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
702   // *vec);
703   OMPRTL__kmpc_doacross_post,
704   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
705   // *vec);
706   OMPRTL__kmpc_doacross_wait,
707   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
708   // *data);
709   OMPRTL__kmpc_task_reduction_init,
710   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
711   // *d);
712   OMPRTL__kmpc_task_reduction_get_th_data,
713   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
714   OMPRTL__kmpc_alloc,
715   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
716   OMPRTL__kmpc_free,
717 
718   //
719   // Offloading related calls
720   //
721   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
722   // size);
723   OMPRTL__kmpc_push_target_tripcount,
724   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
725   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
726   // *arg_types);
727   OMPRTL__tgt_target,
728   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
729   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
730   // *arg_types);
731   OMPRTL__tgt_target_nowait,
732   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
733   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
734   // *arg_types, int32_t num_teams, int32_t thread_limit);
735   OMPRTL__tgt_target_teams,
736   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
737   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
738   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
739   OMPRTL__tgt_target_teams_nowait,
740   // Call to void __tgt_register_requires(int64_t flags);
741   OMPRTL__tgt_register_requires,
742   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
743   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
744   OMPRTL__tgt_target_data_begin,
745   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
746   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
747   // *arg_types);
748   OMPRTL__tgt_target_data_begin_nowait,
749   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
750   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
751   OMPRTL__tgt_target_data_end,
752   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
753   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
754   // *arg_types);
755   OMPRTL__tgt_target_data_end_nowait,
756   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
757   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
758   OMPRTL__tgt_target_data_update,
759   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
760   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
761   // *arg_types);
762   OMPRTL__tgt_target_data_update_nowait,
763   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
764   OMPRTL__tgt_mapper_num_components,
765   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
766   // *base, void *begin, int64_t size, int64_t type);
767   OMPRTL__tgt_push_mapper_component,
768   // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
769   // int gtid, kmp_task_t *task);
770   OMPRTL__kmpc_task_allow_completion_event,
771 };
772 
773 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
774 /// region.
775 class CleanupTy final : public EHScopeStack::Cleanup {
776   PrePostActionTy *Action;
777 
778 public:
779   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
780   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
781     if (!CGF.HaveInsertPoint())
782       return;
783     Action->Exit(CGF);
784   }
785 };
786 
787 } // anonymous namespace
788 
789 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
790   CodeGenFunction::RunCleanupsScope Scope(CGF);
791   if (PrePostAction) {
792     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
793     Callback(CodeGen, CGF, *PrePostAction);
794   } else {
795     PrePostActionTy Action;
796     Callback(CodeGen, CGF, Action);
797   }
798 }
799 
800 /// Check if the combiner is a call to UDR combiner and if it is so return the
801 /// UDR decl used for reduction.
802 static const OMPDeclareReductionDecl *
803 getReductionInit(const Expr *ReductionOp) {
804   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
805     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
806       if (const auto *DRE =
807               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
808         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
809           return DRD;
810   return nullptr;
811 }
812 
813 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
814                                              const OMPDeclareReductionDecl *DRD,
815                                              const Expr *InitOp,
816                                              Address Private, Address Original,
817                                              QualType Ty) {
818   if (DRD->getInitializer()) {
819     std::pair<llvm::Function *, llvm::Function *> Reduction =
820         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
821     const auto *CE = cast<CallExpr>(InitOp);
822     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
823     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
824     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
825     const auto *LHSDRE =
826         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
827     const auto *RHSDRE =
828         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
829     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
830     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
831                             [=]() { return Private; });
832     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
833                             [=]() { return Original; });
834     (void)PrivateScope.Privatize();
835     RValue Func = RValue::get(Reduction.second);
836     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
837     CGF.EmitIgnoredExpr(InitOp);
838   } else {
839     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
840     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
841     auto *GV = new llvm::GlobalVariable(
842         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
843         llvm::GlobalValue::PrivateLinkage, Init, Name);
844     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
845     RValue InitRVal;
846     switch (CGF.getEvaluationKind(Ty)) {
847     case TEK_Scalar:
848       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
849       break;
850     case TEK_Complex:
851       InitRVal =
852           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
853       break;
854     case TEK_Aggregate:
855       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
856       break;
857     }
858     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
859     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
860     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
861                          /*IsInitializer=*/false);
862   }
863 }
864 
865 /// Emit initialization of arrays of complex types.
866 /// \param DestAddr Address of the array.
867 /// \param Type Type of array.
868 /// \param Init Initial expression of array.
869 /// \param SrcAddr Address of the original array.
870 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
871                                  QualType Type, bool EmitDeclareReductionInit,
872                                  const Expr *Init,
873                                  const OMPDeclareReductionDecl *DRD,
874                                  Address SrcAddr = Address::invalid()) {
875   // Perform element-by-element initialization.
876   QualType ElementTy;
877 
878   // Drill down to the base element type on both arrays.
879   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
880   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
881   DestAddr =
882       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
883   if (DRD)
884     SrcAddr =
885         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
886 
887   llvm::Value *SrcBegin = nullptr;
888   if (DRD)
889     SrcBegin = SrcAddr.getPointer();
890   llvm::Value *DestBegin = DestAddr.getPointer();
891   // Cast from pointer to array type to pointer to single element.
892   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
893   // The basic structure here is a while-do loop.
894   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
895   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
896   llvm::Value *IsEmpty =
897       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
898   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
899 
900   // Enter the loop body, making that address the current address.
901   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
902   CGF.EmitBlock(BodyBB);
903 
904   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
905 
906   llvm::PHINode *SrcElementPHI = nullptr;
907   Address SrcElementCurrent = Address::invalid();
908   if (DRD) {
909     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
910                                           "omp.arraycpy.srcElementPast");
911     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
912     SrcElementCurrent =
913         Address(SrcElementPHI,
914                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
915   }
916   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
917       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
918   DestElementPHI->addIncoming(DestBegin, EntryBB);
919   Address DestElementCurrent =
920       Address(DestElementPHI,
921               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
922 
923   // Emit copy.
924   {
925     CodeGenFunction::RunCleanupsScope InitScope(CGF);
926     if (EmitDeclareReductionInit) {
927       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
928                                        SrcElementCurrent, ElementTy);
929     } else
930       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
931                            /*IsInitializer=*/false);
932   }
933 
934   if (DRD) {
935     // Shift the address forward by one element.
936     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
937         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
938     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
939   }
940 
941   // Shift the address forward by one element.
942   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
943       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
944   // Check whether we've reached the end.
945   llvm::Value *Done =
946       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
947   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
948   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
949 
950   // Done.
951   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
952 }
953 
954 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
955   return CGF.EmitOMPSharedLValue(E);
956 }
957 
958 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
959                                             const Expr *E) {
960   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
961     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
962   return LValue();
963 }
964 
965 void ReductionCodeGen::emitAggregateInitialization(
966     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
967     const OMPDeclareReductionDecl *DRD) {
968   // Emit VarDecl with copy init for arrays.
969   // Get the address of the original variable captured in current
970   // captured region.
971   const auto *PrivateVD =
972       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
973   bool EmitDeclareReductionInit =
974       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
975   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
976                        EmitDeclareReductionInit,
977                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
978                                                 : PrivateVD->getInit(),
979                        DRD, SharedLVal.getAddress(CGF));
980 }
981 
982 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
983                                    ArrayRef<const Expr *> Privates,
984                                    ArrayRef<const Expr *> ReductionOps) {
985   ClausesData.reserve(Shareds.size());
986   SharedAddresses.reserve(Shareds.size());
987   Sizes.reserve(Shareds.size());
988   BaseDecls.reserve(Shareds.size());
989   auto IPriv = Privates.begin();
990   auto IRed = ReductionOps.begin();
991   for (const Expr *Ref : Shareds) {
992     ClausesData.emplace_back(Ref, *IPriv, *IRed);
993     std::advance(IPriv, 1);
994     std::advance(IRed, 1);
995   }
996 }
997 
998 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
999   assert(SharedAddresses.size() == N &&
1000          "Number of generated lvalues must be exactly N.");
1001   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1002   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1003   SharedAddresses.emplace_back(First, Second);
1004 }
1005 
1006 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1007   const auto *PrivateVD =
1008       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1009   QualType PrivateType = PrivateVD->getType();
1010   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1011   if (!PrivateType->isVariablyModifiedType()) {
1012     Sizes.emplace_back(
1013         CGF.getTypeSize(
1014             SharedAddresses[N].first.getType().getNonReferenceType()),
1015         nullptr);
1016     return;
1017   }
1018   llvm::Value *Size;
1019   llvm::Value *SizeInChars;
1020   auto *ElemType = cast<llvm::PointerType>(
1021                        SharedAddresses[N].first.getPointer(CGF)->getType())
1022                        ->getElementType();
1023   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1024   if (AsArraySection) {
1025     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1026                                      SharedAddresses[N].first.getPointer(CGF));
1027     Size = CGF.Builder.CreateNUWAdd(
1028         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1029     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1030   } else {
1031     SizeInChars = CGF.getTypeSize(
1032         SharedAddresses[N].first.getType().getNonReferenceType());
1033     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1034   }
1035   Sizes.emplace_back(SizeInChars, Size);
1036   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1037       CGF,
1038       cast<OpaqueValueExpr>(
1039           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1040       RValue::get(Size));
1041   CGF.EmitVariablyModifiedType(PrivateType);
1042 }
1043 
1044 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1045                                          llvm::Value *Size) {
1046   const auto *PrivateVD =
1047       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1048   QualType PrivateType = PrivateVD->getType();
1049   if (!PrivateType->isVariablyModifiedType()) {
1050     assert(!Size && !Sizes[N].second &&
1051            "Size should be nullptr for non-variably modified reduction "
1052            "items.");
1053     return;
1054   }
1055   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1056       CGF,
1057       cast<OpaqueValueExpr>(
1058           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1059       RValue::get(Size));
1060   CGF.EmitVariablyModifiedType(PrivateType);
1061 }
1062 
1063 void ReductionCodeGen::emitInitialization(
1064     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1065     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1066   assert(SharedAddresses.size() > N && "No variable was generated");
1067   const auto *PrivateVD =
1068       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1069   const OMPDeclareReductionDecl *DRD =
1070       getReductionInit(ClausesData[N].ReductionOp);
1071   QualType PrivateType = PrivateVD->getType();
1072   PrivateAddr = CGF.Builder.CreateElementBitCast(
1073       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1074   QualType SharedType = SharedAddresses[N].first.getType();
1075   SharedLVal = CGF.MakeAddrLValue(
1076       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1077                                        CGF.ConvertTypeForMem(SharedType)),
1078       SharedType, SharedAddresses[N].first.getBaseInfo(),
1079       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1080   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1081     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1082   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1083     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1084                                      PrivateAddr, SharedLVal.getAddress(CGF),
1085                                      SharedLVal.getType());
1086   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1087              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1088     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1089                          PrivateVD->getType().getQualifiers(),
1090                          /*IsInitializer=*/false);
1091   }
1092 }
1093 
1094 bool ReductionCodeGen::needCleanups(unsigned N) {
1095   const auto *PrivateVD =
1096       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097   QualType PrivateType = PrivateVD->getType();
1098   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099   return DTorKind != QualType::DK_none;
1100 }
1101 
1102 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1103                                     Address PrivateAddr) {
1104   const auto *PrivateVD =
1105       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1106   QualType PrivateType = PrivateVD->getType();
1107   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1108   if (needCleanups(N)) {
1109     PrivateAddr = CGF.Builder.CreateElementBitCast(
1110         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1111     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1112   }
1113 }
1114 
1115 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1116                           LValue BaseLV) {
1117   BaseTy = BaseTy.getNonReferenceType();
1118   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1119          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1120     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1121       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1122     } else {
1123       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1124       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1125     }
1126     BaseTy = BaseTy->getPointeeType();
1127   }
1128   return CGF.MakeAddrLValue(
1129       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1130                                        CGF.ConvertTypeForMem(ElTy)),
1131       BaseLV.getType(), BaseLV.getBaseInfo(),
1132       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1133 }
1134 
1135 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1136                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1137                           llvm::Value *Addr) {
1138   Address Tmp = Address::invalid();
1139   Address TopTmp = Address::invalid();
1140   Address MostTopTmp = Address::invalid();
1141   BaseTy = BaseTy.getNonReferenceType();
1142   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1143          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1144     Tmp = CGF.CreateMemTemp(BaseTy);
1145     if (TopTmp.isValid())
1146       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1147     else
1148       MostTopTmp = Tmp;
1149     TopTmp = Tmp;
1150     BaseTy = BaseTy->getPointeeType();
1151   }
1152   llvm::Type *Ty = BaseLVType;
1153   if (Tmp.isValid())
1154     Ty = Tmp.getElementType();
1155   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1156   if (Tmp.isValid()) {
1157     CGF.Builder.CreateStore(Addr, Tmp);
1158     return MostTopTmp;
1159   }
1160   return Address(Addr, BaseLVAlignment);
1161 }
1162 
1163 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1164   const VarDecl *OrigVD = nullptr;
1165   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1166     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1167     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1168       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1169     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1170       Base = TempASE->getBase()->IgnoreParenImpCasts();
1171     DE = cast<DeclRefExpr>(Base);
1172     OrigVD = cast<VarDecl>(DE->getDecl());
1173   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1174     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1175     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1176       Base = TempASE->getBase()->IgnoreParenImpCasts();
1177     DE = cast<DeclRefExpr>(Base);
1178     OrigVD = cast<VarDecl>(DE->getDecl());
1179   }
1180   return OrigVD;
1181 }
1182 
1183 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1184                                                Address PrivateAddr) {
1185   const DeclRefExpr *DE;
1186   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1187     BaseDecls.emplace_back(OrigVD);
1188     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1189     LValue BaseLValue =
1190         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1191                     OriginalBaseLValue);
1192     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1193         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1194     llvm::Value *PrivatePointer =
1195         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1196             PrivateAddr.getPointer(),
1197             SharedAddresses[N].first.getAddress(CGF).getType());
1198     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1199     return castToBase(CGF, OrigVD->getType(),
1200                       SharedAddresses[N].first.getType(),
1201                       OriginalBaseLValue.getAddress(CGF).getType(),
1202                       OriginalBaseLValue.getAlignment(), Ptr);
1203   }
1204   BaseDecls.emplace_back(
1205       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1206   return PrivateAddr;
1207 }
1208 
1209 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1210   const OMPDeclareReductionDecl *DRD =
1211       getReductionInit(ClausesData[N].ReductionOp);
1212   return DRD && DRD->getInitializer();
1213 }
1214 
1215 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1216   return CGF.EmitLoadOfPointerLValue(
1217       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1218       getThreadIDVariable()->getType()->castAs<PointerType>());
1219 }
1220 
1221 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1222   if (!CGF.HaveInsertPoint())
1223     return;
1224   // 1.2.2 OpenMP Language Terminology
1225   // Structured block - An executable statement with a single entry at the
1226   // top and a single exit at the bottom.
1227   // The point of exit cannot be a branch out of the structured block.
1228   // longjmp() and throw() must not violate the entry/exit criteria.
1229   CGF.EHStack.pushTerminate();
1230   CodeGen(CGF);
1231   CGF.EHStack.popTerminate();
1232 }
1233 
1234 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1235     CodeGenFunction &CGF) {
1236   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1237                             getThreadIDVariable()->getType(),
1238                             AlignmentSource::Decl);
1239 }
1240 
1241 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1242                                        QualType FieldTy) {
1243   auto *Field = FieldDecl::Create(
1244       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1245       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1246       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1247   Field->setAccess(AS_public);
1248   DC->addDecl(Field);
1249   return Field;
1250 }
1251 
1252 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1253                                  StringRef Separator)
1254     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1255       OffloadEntriesInfoManager(CGM) {
1256   ASTContext &C = CGM.getContext();
1257   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1258   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1259   RD->startDefinition();
1260   // reserved_1
1261   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1262   // flags
1263   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1264   // reserved_2
1265   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1266   // reserved_3
1267   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1268   // psource
1269   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1270   RD->completeDefinition();
1271   IdentQTy = C.getRecordType(RD);
1272   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1273   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1274 
1275   loadOffloadInfoMetadata();
1276 }
1277 
1278 void CGOpenMPRuntime::clear() {
1279   InternalVars.clear();
1280   // Clean non-target variable declarations possibly used only in debug info.
1281   for (const auto &Data : EmittedNonTargetVariables) {
1282     if (!Data.getValue().pointsToAliveValue())
1283       continue;
1284     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1285     if (!GV)
1286       continue;
1287     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1288       continue;
1289     GV->eraseFromParent();
1290   }
1291 }
1292 
1293 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1294   SmallString<128> Buffer;
1295   llvm::raw_svector_ostream OS(Buffer);
1296   StringRef Sep = FirstSeparator;
1297   for (StringRef Part : Parts) {
1298     OS << Sep << Part;
1299     Sep = Separator;
1300   }
1301   return std::string(OS.str());
1302 }
1303 
1304 static llvm::Function *
1305 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1306                           const Expr *CombinerInitializer, const VarDecl *In,
1307                           const VarDecl *Out, bool IsCombiner) {
1308   // void .omp_combiner.(Ty *in, Ty *out);
1309   ASTContext &C = CGM.getContext();
1310   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1311   FunctionArgList Args;
1312   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1313                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1314   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1315                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1316   Args.push_back(&OmpOutParm);
1317   Args.push_back(&OmpInParm);
1318   const CGFunctionInfo &FnInfo =
1319       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1320   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1321   std::string Name = CGM.getOpenMPRuntime().getName(
1322       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1323   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1324                                     Name, &CGM.getModule());
1325   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1326   if (CGM.getLangOpts().Optimize) {
1327     Fn->removeFnAttr(llvm::Attribute::NoInline);
1328     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1329     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1330   }
1331   CodeGenFunction CGF(CGM);
1332   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1333   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1334   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1335                     Out->getLocation());
1336   CodeGenFunction::OMPPrivateScope Scope(CGF);
1337   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1338   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1339     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1340         .getAddress(CGF);
1341   });
1342   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1343   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1344     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1345         .getAddress(CGF);
1346   });
1347   (void)Scope.Privatize();
1348   if (!IsCombiner && Out->hasInit() &&
1349       !CGF.isTrivialInitializer(Out->getInit())) {
1350     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1351                          Out->getType().getQualifiers(),
1352                          /*IsInitializer=*/true);
1353   }
1354   if (CombinerInitializer)
1355     CGF.EmitIgnoredExpr(CombinerInitializer);
1356   Scope.ForceCleanup();
1357   CGF.FinishFunction();
1358   return Fn;
1359 }
1360 
1361 void CGOpenMPRuntime::emitUserDefinedReduction(
1362     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1363   if (UDRMap.count(D) > 0)
1364     return;
1365   llvm::Function *Combiner = emitCombinerOrInitializer(
1366       CGM, D->getType(), D->getCombiner(),
1367       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1368       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1369       /*IsCombiner=*/true);
1370   llvm::Function *Initializer = nullptr;
1371   if (const Expr *Init = D->getInitializer()) {
1372     Initializer = emitCombinerOrInitializer(
1373         CGM, D->getType(),
1374         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1375                                                                      : nullptr,
1376         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1377         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1378         /*IsCombiner=*/false);
1379   }
1380   UDRMap.try_emplace(D, Combiner, Initializer);
1381   if (CGF) {
1382     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1383     Decls.second.push_back(D);
1384   }
1385 }
1386 
1387 std::pair<llvm::Function *, llvm::Function *>
1388 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1389   auto I = UDRMap.find(D);
1390   if (I != UDRMap.end())
1391     return I->second;
1392   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1393   return UDRMap.lookup(D);
1394 }
1395 
1396 namespace {
1397 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1398 // Builder if one is present.
1399 struct PushAndPopStackRAII {
1400   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1401                       bool HasCancel)
1402       : OMPBuilder(OMPBuilder) {
1403     if (!OMPBuilder)
1404       return;
1405 
1406     // The following callback is the crucial part of clangs cleanup process.
1407     //
1408     // NOTE:
1409     // Once the OpenMPIRBuilder is used to create parallel regions (and
1410     // similar), the cancellation destination (Dest below) is determined via
1411     // IP. That means if we have variables to finalize we split the block at IP,
1412     // use the new block (=BB) as destination to build a JumpDest (via
1413     // getJumpDestInCurrentScope(BB)) which then is fed to
1414     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1415     // to push & pop an FinalizationInfo object.
1416     // The FiniCB will still be needed but at the point where the
1417     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1418     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1419       assert(IP.getBlock()->end() == IP.getPoint() &&
1420              "Clang CG should cause non-terminated block!");
1421       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1422       CGF.Builder.restoreIP(IP);
1423       CodeGenFunction::JumpDest Dest =
1424           CGF.getOMPCancelDestination(OMPD_parallel);
1425       CGF.EmitBranchThroughCleanup(Dest);
1426     };
1427 
1428     // TODO: Remove this once we emit parallel regions through the
1429     //       OpenMPIRBuilder as it can do this setup internally.
1430     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1431         {FiniCB, OMPD_parallel, HasCancel});
1432     OMPBuilder->pushFinalizationCB(std::move(FI));
1433   }
1434   ~PushAndPopStackRAII() {
1435     if (OMPBuilder)
1436       OMPBuilder->popFinalizationCB();
1437   }
1438   llvm::OpenMPIRBuilder *OMPBuilder;
1439 };
1440 } // namespace
1441 
1442 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1443     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1444     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1445     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1446   assert(ThreadIDVar->getType()->isPointerType() &&
1447          "thread id variable must be of type kmp_int32 *");
1448   CodeGenFunction CGF(CGM, true);
1449   bool HasCancel = false;
1450   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1451     HasCancel = OPD->hasCancel();
1452   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1453     HasCancel = OPSD->hasCancel();
1454   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1455     HasCancel = OPFD->hasCancel();
1456   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1457     HasCancel = OPFD->hasCancel();
1458   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1459     HasCancel = OPFD->hasCancel();
1460   else if (const auto *OPFD =
1461                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1462     HasCancel = OPFD->hasCancel();
1463   else if (const auto *OPFD =
1464                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1465     HasCancel = OPFD->hasCancel();
1466 
1467   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1468   //       parallel region to make cancellation barriers work properly.
1469   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1470   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1471   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1472                                     HasCancel, OutlinedHelperName);
1473   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1474   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1475 }
1476 
1477 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1478     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1479     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1480   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1481   return emitParallelOrTeamsOutlinedFunction(
1482       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1483 }
1484 
1485 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1486     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1487     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1488   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1489   return emitParallelOrTeamsOutlinedFunction(
1490       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1491 }
1492 
1493 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1494     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1495     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1496     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1497     bool Tied, unsigned &NumberOfParts) {
1498   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1499                                               PrePostActionTy &) {
1500     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1501     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1502     llvm::Value *TaskArgs[] = {
1503         UpLoc, ThreadID,
1504         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1505                                     TaskTVar->getType()->castAs<PointerType>())
1506             .getPointer(CGF)};
1507     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1508   };
1509   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1510                                                             UntiedCodeGen);
1511   CodeGen.setAction(Action);
1512   assert(!ThreadIDVar->getType()->isPointerType() &&
1513          "thread id variable must be of type kmp_int32 for tasks");
1514   const OpenMPDirectiveKind Region =
1515       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1516                                                       : OMPD_task;
1517   const CapturedStmt *CS = D.getCapturedStmt(Region);
1518   bool HasCancel = false;
1519   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1520     HasCancel = TD->hasCancel();
1521   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1522     HasCancel = TD->hasCancel();
1523   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1524     HasCancel = TD->hasCancel();
1525   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1526     HasCancel = TD->hasCancel();
1527 
1528   CodeGenFunction CGF(CGM, true);
1529   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1530                                         InnermostKind, HasCancel, Action);
1531   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1532   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1533   if (!Tied)
1534     NumberOfParts = Action.getNumberOfParts();
1535   return Res;
1536 }
1537 
1538 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1539                              const RecordDecl *RD, const CGRecordLayout &RL,
1540                              ArrayRef<llvm::Constant *> Data) {
1541   llvm::StructType *StructTy = RL.getLLVMType();
1542   unsigned PrevIdx = 0;
1543   ConstantInitBuilder CIBuilder(CGM);
1544   auto DI = Data.begin();
1545   for (const FieldDecl *FD : RD->fields()) {
1546     unsigned Idx = RL.getLLVMFieldNo(FD);
1547     // Fill the alignment.
1548     for (unsigned I = PrevIdx; I < Idx; ++I)
1549       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1550     PrevIdx = Idx + 1;
1551     Fields.add(*DI);
1552     ++DI;
1553   }
1554 }
1555 
1556 template <class... As>
1557 static llvm::GlobalVariable *
1558 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1559                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1560                    As &&... Args) {
1561   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1562   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1563   ConstantInitBuilder CIBuilder(CGM);
1564   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1565   buildStructValue(Fields, CGM, RD, RL, Data);
1566   return Fields.finishAndCreateGlobal(
1567       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1568       std::forward<As>(Args)...);
1569 }
1570 
1571 template <typename T>
1572 static void
1573 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1574                                          ArrayRef<llvm::Constant *> Data,
1575                                          T &Parent) {
1576   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1577   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1578   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1579   buildStructValue(Fields, CGM, RD, RL, Data);
1580   Fields.finishAndAddTo(Parent);
1581 }
1582 
1583 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1584   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1585   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1586   FlagsTy FlagsKey(Flags, Reserved2Flags);
1587   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1588   if (!Entry) {
1589     if (!DefaultOpenMPPSource) {
1590       // Initialize default location for psource field of ident_t structure of
1591       // all ident_t objects. Format is ";file;function;line;column;;".
1592       // Taken from
1593       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1594       DefaultOpenMPPSource =
1595           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1596       DefaultOpenMPPSource =
1597           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1598     }
1599 
1600     llvm::Constant *Data[] = {
1601         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1602         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1603         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1604         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1605     llvm::GlobalValue *DefaultOpenMPLocation =
1606         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1607                            llvm::GlobalValue::PrivateLinkage);
1608     DefaultOpenMPLocation->setUnnamedAddr(
1609         llvm::GlobalValue::UnnamedAddr::Global);
1610 
1611     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1612   }
1613   return Address(Entry, Align);
1614 }
1615 
1616 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1617                                              bool AtCurrentPoint) {
1618   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1619   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1620 
1621   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1622   if (AtCurrentPoint) {
1623     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1624         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1625   } else {
1626     Elem.second.ServiceInsertPt =
1627         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1628     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1629   }
1630 }
1631 
1632 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1633   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1634   if (Elem.second.ServiceInsertPt) {
1635     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1636     Elem.second.ServiceInsertPt = nullptr;
1637     Ptr->eraseFromParent();
1638   }
1639 }
1640 
1641 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1642                                                  SourceLocation Loc,
1643                                                  unsigned Flags) {
1644   Flags |= OMP_IDENT_KMPC;
1645   // If no debug info is generated - return global default location.
1646   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1647       Loc.isInvalid())
1648     return getOrCreateDefaultLocation(Flags).getPointer();
1649 
1650   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1651 
1652   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1653   Address LocValue = Address::invalid();
1654   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1655   if (I != OpenMPLocThreadIDMap.end())
1656     LocValue = Address(I->second.DebugLoc, Align);
1657 
1658   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1659   // GetOpenMPThreadID was called before this routine.
1660   if (!LocValue.isValid()) {
1661     // Generate "ident_t .kmpc_loc.addr;"
1662     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1663     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1664     Elem.second.DebugLoc = AI.getPointer();
1665     LocValue = AI;
1666 
1667     if (!Elem.second.ServiceInsertPt)
1668       setLocThreadIdInsertPt(CGF);
1669     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1670     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1671     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1672                              CGF.getTypeSize(IdentQTy));
1673   }
1674 
1675   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1676   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1677   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1678   LValue PSource =
1679       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1680 
1681   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1682   if (OMPDebugLoc == nullptr) {
1683     SmallString<128> Buffer2;
1684     llvm::raw_svector_ostream OS2(Buffer2);
1685     // Build debug location
1686     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1687     OS2 << ";" << PLoc.getFilename() << ";";
1688     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1689       OS2 << FD->getQualifiedNameAsString();
1690     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1691     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1692     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1693   }
1694   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1695   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1696 
1697   // Our callers always pass this to a runtime function, so for
1698   // convenience, go ahead and return a naked pointer.
1699   return LocValue.getPointer();
1700 }
1701 
1702 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1703                                           SourceLocation Loc) {
1704   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1705 
1706   llvm::Value *ThreadID = nullptr;
1707   // Check whether we've already cached a load of the thread id in this
1708   // function.
1709   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1710   if (I != OpenMPLocThreadIDMap.end()) {
1711     ThreadID = I->second.ThreadID;
1712     if (ThreadID != nullptr)
1713       return ThreadID;
1714   }
1715   // If exceptions are enabled, do not use parameter to avoid possible crash.
1716   if (auto *OMPRegionInfo =
1717           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1718     if (OMPRegionInfo->getThreadIDVariable()) {
1719       // Check if this an outlined function with thread id passed as argument.
1720       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1721       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1722       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1723           !CGF.getLangOpts().CXXExceptions ||
1724           CGF.Builder.GetInsertBlock() == TopBlock ||
1725           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1726           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1727               TopBlock ||
1728           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1729               CGF.Builder.GetInsertBlock()) {
1730         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1731         // If value loaded in entry block, cache it and use it everywhere in
1732         // function.
1733         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1734           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1735           Elem.second.ThreadID = ThreadID;
1736         }
1737         return ThreadID;
1738       }
1739     }
1740   }
1741 
1742   // This is not an outlined function region - need to call __kmpc_int32
1743   // kmpc_global_thread_num(ident_t *loc).
1744   // Generate thread id value and cache this value for use across the
1745   // function.
1746   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1747   if (!Elem.second.ServiceInsertPt)
1748     setLocThreadIdInsertPt(CGF);
1749   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1750   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1751   llvm::CallInst *Call = CGF.Builder.CreateCall(
1752       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1753       emitUpdateLocation(CGF, Loc));
1754   Call->setCallingConv(CGF.getRuntimeCC());
1755   Elem.second.ThreadID = Call;
1756   return Call;
1757 }
1758 
1759 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1760   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1761   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1762     clearLocThreadIdInsertPt(CGF);
1763     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1764   }
1765   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1766     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1767       UDRMap.erase(D);
1768     FunctionUDRMap.erase(CGF.CurFn);
1769   }
1770   auto I = FunctionUDMMap.find(CGF.CurFn);
1771   if (I != FunctionUDMMap.end()) {
1772     for(const auto *D : I->second)
1773       UDMMap.erase(D);
1774     FunctionUDMMap.erase(I);
1775   }
1776   LastprivateConditionalToTypes.erase(CGF.CurFn);
1777 }
1778 
1779 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1780   return IdentTy->getPointerTo();
1781 }
1782 
1783 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1784   if (!Kmpc_MicroTy) {
1785     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1786     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1787                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1788     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1789   }
1790   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1791 }
1792 
1793 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1794   llvm::FunctionCallee RTLFn = nullptr;
1795   switch (static_cast<OpenMPRTLFunction>(Function)) {
1796   case OMPRTL__kmpc_fork_call: {
1797     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1798     // microtask, ...);
1799     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1800                                 getKmpc_MicroPointerTy()};
1801     auto *FnTy =
1802         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1803     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1804     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1805       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1806         llvm::LLVMContext &Ctx = F->getContext();
1807         llvm::MDBuilder MDB(Ctx);
1808         // Annotate the callback behavior of the __kmpc_fork_call:
1809         //  - The callback callee is argument number 2 (microtask).
1810         //  - The first two arguments of the callback callee are unknown (-1).
1811         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1812         //    callback callee.
1813         F->addMetadata(
1814             llvm::LLVMContext::MD_callback,
1815             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1816                                         2, {-1, -1},
1817                                         /* VarArgsArePassed */ true)}));
1818       }
1819     }
1820     break;
1821   }
1822   case OMPRTL__kmpc_global_thread_num: {
1823     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1828     break;
1829   }
1830   case OMPRTL__kmpc_threadprivate_cached: {
1831     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1832     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1833     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1834                                 CGM.VoidPtrTy, CGM.SizeTy,
1835                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1836     auto *FnTy =
1837         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1838     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1839     break;
1840   }
1841   case OMPRTL__kmpc_critical: {
1842     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1843     // kmp_critical_name *crit);
1844     llvm::Type *TypeParams[] = {
1845         getIdentTyPointerTy(), CGM.Int32Ty,
1846         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1850     break;
1851   }
1852   case OMPRTL__kmpc_critical_with_hint: {
1853     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1854     // kmp_critical_name *crit, uintptr_t hint);
1855     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1856                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1857                                 CGM.IntPtrTy};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1861     break;
1862   }
1863   case OMPRTL__kmpc_threadprivate_register: {
1864     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1865     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1866     // typedef void *(*kmpc_ctor)(void *);
1867     auto *KmpcCtorTy =
1868         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869                                 /*isVarArg*/ false)->getPointerTo();
1870     // typedef void *(*kmpc_cctor)(void *, void *);
1871     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1872     auto *KmpcCopyCtorTy =
1873         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1874                                 /*isVarArg*/ false)
1875             ->getPointerTo();
1876     // typedef void (*kmpc_dtor)(void *);
1877     auto *KmpcDtorTy =
1878         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1879             ->getPointerTo();
1880     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1881                               KmpcCopyCtorTy, KmpcDtorTy};
1882     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1883                                         /*isVarArg*/ false);
1884     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1885     break;
1886   }
1887   case OMPRTL__kmpc_end_critical: {
1888     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1889     // kmp_critical_name *crit);
1890     llvm::Type *TypeParams[] = {
1891         getIdentTyPointerTy(), CGM.Int32Ty,
1892         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1893     auto *FnTy =
1894         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1895     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1896     break;
1897   }
1898   case OMPRTL__kmpc_cancel_barrier: {
1899     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1900     // global_tid);
1901     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1902     auto *FnTy =
1903         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1904     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1905     break;
1906   }
1907   case OMPRTL__kmpc_barrier: {
1908     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1909     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1913     break;
1914   }
1915   case OMPRTL__kmpc_for_static_fini: {
1916     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1917     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1921     break;
1922   }
1923   case OMPRTL__kmpc_push_num_threads: {
1924     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1925     // kmp_int32 num_threads)
1926     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1927                                 CGM.Int32Ty};
1928     auto *FnTy =
1929         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1930     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1931     break;
1932   }
1933   case OMPRTL__kmpc_serialized_parallel: {
1934     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1935     // global_tid);
1936     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1937     auto *FnTy =
1938         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1939     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1940     break;
1941   }
1942   case OMPRTL__kmpc_end_serialized_parallel: {
1943     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1944     // global_tid);
1945     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1949     break;
1950   }
1951   case OMPRTL__kmpc_flush: {
1952     // Build void __kmpc_flush(ident_t *loc);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1954     auto *FnTy =
1955         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1957     break;
1958   }
1959   case OMPRTL__kmpc_master: {
1960     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1961     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1962     auto *FnTy =
1963         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1964     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1965     break;
1966   }
1967   case OMPRTL__kmpc_end_master: {
1968     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1970     auto *FnTy =
1971         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1972     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1973     break;
1974   }
1975   case OMPRTL__kmpc_omp_taskyield: {
1976     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1977     // int end_part);
1978     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1981     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1982     break;
1983   }
1984   case OMPRTL__kmpc_single: {
1985     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1986     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1987     auto *FnTy =
1988         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1989     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1990     break;
1991   }
1992   case OMPRTL__kmpc_end_single: {
1993     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1994     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1995     auto *FnTy =
1996         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1997     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1998     break;
1999   }
2000   case OMPRTL__kmpc_omp_task_alloc: {
2001     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2002     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2003     // kmp_routine_entry_t *task_entry);
2004     assert(KmpRoutineEntryPtrTy != nullptr &&
2005            "Type kmp_routine_entry_t must be created.");
2006     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2007                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2008     // Return void * and then cast to particular kmp_task_t type.
2009     auto *FnTy =
2010         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2011     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2012     break;
2013   }
2014   case OMPRTL__kmpc_omp_target_task_alloc: {
2015     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2016     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2017     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2018     assert(KmpRoutineEntryPtrTy != nullptr &&
2019            "Type kmp_routine_entry_t must be created.");
2020     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2021                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2022                                 CGM.Int64Ty};
2023     // Return void * and then cast to particular kmp_task_t type.
2024     auto *FnTy =
2025         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2026     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2027     break;
2028   }
2029   case OMPRTL__kmpc_omp_task: {
2030     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2031     // *new_task);
2032     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2033                                 CGM.VoidPtrTy};
2034     auto *FnTy =
2035         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2036     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2037     break;
2038   }
2039   case OMPRTL__kmpc_copyprivate: {
2040     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2041     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2042     // kmp_int32 didit);
2043     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2044     auto *CpyFnTy =
2045         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2046     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2047                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2048                                 CGM.Int32Ty};
2049     auto *FnTy =
2050         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2051     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2052     break;
2053   }
2054   case OMPRTL__kmpc_reduce: {
2055     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2056     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2057     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2058     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2059     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2060                                                /*isVarArg=*/false);
2061     llvm::Type *TypeParams[] = {
2062         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2063         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2064         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2065     auto *FnTy =
2066         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2067     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2068     break;
2069   }
2070   case OMPRTL__kmpc_reduce_nowait: {
2071     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2072     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2073     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2074     // *lck);
2075     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2076     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2077                                                /*isVarArg=*/false);
2078     llvm::Type *TypeParams[] = {
2079         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2080         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2081         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2085     break;
2086   }
2087   case OMPRTL__kmpc_end_reduce: {
2088     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2089     // kmp_critical_name *lck);
2090     llvm::Type *TypeParams[] = {
2091         getIdentTyPointerTy(), CGM.Int32Ty,
2092         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2093     auto *FnTy =
2094         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2095     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2096     break;
2097   }
2098   case OMPRTL__kmpc_end_reduce_nowait: {
2099     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2100     // kmp_critical_name *lck);
2101     llvm::Type *TypeParams[] = {
2102         getIdentTyPointerTy(), CGM.Int32Ty,
2103         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2104     auto *FnTy =
2105         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2106     RTLFn =
2107         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2108     break;
2109   }
2110   case OMPRTL__kmpc_omp_task_begin_if0: {
2111     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2112     // *new_task);
2113     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2114                                 CGM.VoidPtrTy};
2115     auto *FnTy =
2116         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2117     RTLFn =
2118         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2119     break;
2120   }
2121   case OMPRTL__kmpc_omp_task_complete_if0: {
2122     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2123     // *new_task);
2124     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2125                                 CGM.VoidPtrTy};
2126     auto *FnTy =
2127         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2128     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2129                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2130     break;
2131   }
2132   case OMPRTL__kmpc_ordered: {
2133     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2134     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2135     auto *FnTy =
2136         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2137     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2138     break;
2139   }
2140   case OMPRTL__kmpc_end_ordered: {
2141     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2142     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2143     auto *FnTy =
2144         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2145     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2146     break;
2147   }
2148   case OMPRTL__kmpc_omp_taskwait: {
2149     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2153     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2154     break;
2155   }
2156   case OMPRTL__kmpc_taskgroup: {
2157     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2158     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2159     auto *FnTy =
2160         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2161     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2162     break;
2163   }
2164   case OMPRTL__kmpc_end_taskgroup: {
2165     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2166     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2167     auto *FnTy =
2168         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2169     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2170     break;
2171   }
2172   case OMPRTL__kmpc_push_proc_bind: {
2173     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2174     // int proc_bind)
2175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2176     auto *FnTy =
2177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2178     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2179     break;
2180   }
2181   case OMPRTL__kmpc_omp_task_with_deps: {
2182     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2183     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2184     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2185     llvm::Type *TypeParams[] = {
2186         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2187         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2188     auto *FnTy =
2189         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2190     RTLFn =
2191         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2192     break;
2193   }
2194   case OMPRTL__kmpc_omp_wait_deps: {
2195     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2196     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2197     // kmp_depend_info_t *noalias_dep_list);
2198     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2199                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2200                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2201     auto *FnTy =
2202         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2203     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2204     break;
2205   }
2206   case OMPRTL__kmpc_cancellationpoint: {
2207     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2208     // global_tid, kmp_int32 cncl_kind)
2209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2213     break;
2214   }
2215   case OMPRTL__kmpc_cancel: {
2216     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2217     // kmp_int32 cncl_kind)
2218     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2219     auto *FnTy =
2220         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2221     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2222     break;
2223   }
2224   case OMPRTL__kmpc_push_num_teams: {
2225     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2226     // kmp_int32 num_teams, kmp_int32 num_threads)
2227     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2228         CGM.Int32Ty};
2229     auto *FnTy =
2230         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2231     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2232     break;
2233   }
2234   case OMPRTL__kmpc_fork_teams: {
2235     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2236     // microtask, ...);
2237     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2238                                 getKmpc_MicroPointerTy()};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2241     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2242     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2243       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2244         llvm::LLVMContext &Ctx = F->getContext();
2245         llvm::MDBuilder MDB(Ctx);
2246         // Annotate the callback behavior of the __kmpc_fork_teams:
2247         //  - The callback callee is argument number 2 (microtask).
2248         //  - The first two arguments of the callback callee are unknown (-1).
2249         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2250         //    callback callee.
2251         F->addMetadata(
2252             llvm::LLVMContext::MD_callback,
2253             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2254                                         2, {-1, -1},
2255                                         /* VarArgsArePassed */ true)}));
2256       }
2257     }
2258     break;
2259   }
2260   case OMPRTL__kmpc_taskloop: {
2261     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2262     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2263     // sched, kmp_uint64 grainsize, void *task_dup);
2264     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2265                                 CGM.IntTy,
2266                                 CGM.VoidPtrTy,
2267                                 CGM.IntTy,
2268                                 CGM.Int64Ty->getPointerTo(),
2269                                 CGM.Int64Ty->getPointerTo(),
2270                                 CGM.Int64Ty,
2271                                 CGM.IntTy,
2272                                 CGM.IntTy,
2273                                 CGM.Int64Ty,
2274                                 CGM.VoidPtrTy};
2275     auto *FnTy =
2276         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2277     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2278     break;
2279   }
2280   case OMPRTL__kmpc_doacross_init: {
2281     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2282     // num_dims, struct kmp_dim *dims);
2283     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2284                                 CGM.Int32Ty,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrTy};
2287     auto *FnTy =
2288         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2289     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2290     break;
2291   }
2292   case OMPRTL__kmpc_doacross_fini: {
2293     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2294     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2295     auto *FnTy =
2296         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2297     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2298     break;
2299   }
2300   case OMPRTL__kmpc_doacross_post: {
2301     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2302     // *vec);
2303     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2304                                 CGM.Int64Ty->getPointerTo()};
2305     auto *FnTy =
2306         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2307     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2308     break;
2309   }
2310   case OMPRTL__kmpc_doacross_wait: {
2311     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2312     // *vec);
2313     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2314                                 CGM.Int64Ty->getPointerTo()};
2315     auto *FnTy =
2316         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2317     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2318     break;
2319   }
2320   case OMPRTL__kmpc_task_reduction_init: {
2321     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2322     // *data);
2323     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2324     auto *FnTy =
2325         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2326     RTLFn =
2327         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2328     break;
2329   }
2330   case OMPRTL__kmpc_task_reduction_get_th_data: {
2331     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2332     // *d);
2333     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2334     auto *FnTy =
2335         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2336     RTLFn = CGM.CreateRuntimeFunction(
2337         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2338     break;
2339   }
2340   case OMPRTL__kmpc_alloc: {
2341     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2342     // al); omp_allocator_handle_t type is void *.
2343     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2344     auto *FnTy =
2345         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2346     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2347     break;
2348   }
2349   case OMPRTL__kmpc_free: {
2350     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2351     // al); omp_allocator_handle_t type is void *.
2352     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2356     break;
2357   }
2358   case OMPRTL__kmpc_push_target_tripcount: {
2359     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2360     // size);
2361     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2362     llvm::FunctionType *FnTy =
2363         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2364     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2365     break;
2366   }
2367   case OMPRTL__tgt_target: {
2368     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2369     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2370     // *arg_types);
2371     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2372                                 CGM.VoidPtrTy,
2373                                 CGM.Int32Ty,
2374                                 CGM.VoidPtrPtrTy,
2375                                 CGM.VoidPtrPtrTy,
2376                                 CGM.Int64Ty->getPointerTo(),
2377                                 CGM.Int64Ty->getPointerTo()};
2378     auto *FnTy =
2379         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2380     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2381     break;
2382   }
2383   case OMPRTL__tgt_target_nowait: {
2384     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2385     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2386     // int64_t *arg_types);
2387     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2388                                 CGM.VoidPtrTy,
2389                                 CGM.Int32Ty,
2390                                 CGM.VoidPtrPtrTy,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.Int64Ty->getPointerTo(),
2393                                 CGM.Int64Ty->getPointerTo()};
2394     auto *FnTy =
2395         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2396     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2397     break;
2398   }
2399   case OMPRTL__tgt_target_teams: {
2400     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2401     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2402     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2403     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2404                                 CGM.VoidPtrTy,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo(),
2410                                 CGM.Int32Ty,
2411                                 CGM.Int32Ty};
2412     auto *FnTy =
2413         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2414     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2415     break;
2416   }
2417   case OMPRTL__tgt_target_teams_nowait: {
2418     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2419     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2420     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2421     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2422                                 CGM.VoidPtrTy,
2423                                 CGM.Int32Ty,
2424                                 CGM.VoidPtrPtrTy,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.Int64Ty->getPointerTo(),
2427                                 CGM.Int64Ty->getPointerTo(),
2428                                 CGM.Int32Ty,
2429                                 CGM.Int32Ty};
2430     auto *FnTy =
2431         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2433     break;
2434   }
2435   case OMPRTL__tgt_register_requires: {
2436     // Build void __tgt_register_requires(int64_t flags);
2437     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2438     auto *FnTy =
2439         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2440     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2441     break;
2442   }
2443   case OMPRTL__tgt_target_data_begin: {
2444     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2445     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2446     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2447                                 CGM.Int32Ty,
2448                                 CGM.VoidPtrPtrTy,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.Int64Ty->getPointerTo(),
2451                                 CGM.Int64Ty->getPointerTo()};
2452     auto *FnTy =
2453         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2454     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2455     break;
2456   }
2457   case OMPRTL__tgt_target_data_begin_nowait: {
2458     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2459     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2460     // *arg_types);
2461     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2462                                 CGM.Int32Ty,
2463                                 CGM.VoidPtrPtrTy,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.Int64Ty->getPointerTo(),
2466                                 CGM.Int64Ty->getPointerTo()};
2467     auto *FnTy =
2468         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2469     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2470     break;
2471   }
2472   case OMPRTL__tgt_target_data_end: {
2473     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2474     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2475     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2476                                 CGM.Int32Ty,
2477                                 CGM.VoidPtrPtrTy,
2478                                 CGM.VoidPtrPtrTy,
2479                                 CGM.Int64Ty->getPointerTo(),
2480                                 CGM.Int64Ty->getPointerTo()};
2481     auto *FnTy =
2482         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2483     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2484     break;
2485   }
2486   case OMPRTL__tgt_target_data_end_nowait: {
2487     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2488     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2489     // *arg_types);
2490     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2491                                 CGM.Int32Ty,
2492                                 CGM.VoidPtrPtrTy,
2493                                 CGM.VoidPtrPtrTy,
2494                                 CGM.Int64Ty->getPointerTo(),
2495                                 CGM.Int64Ty->getPointerTo()};
2496     auto *FnTy =
2497         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2498     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2499     break;
2500   }
2501   case OMPRTL__tgt_target_data_update: {
2502     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2503     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2504     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2505                                 CGM.Int32Ty,
2506                                 CGM.VoidPtrPtrTy,
2507                                 CGM.VoidPtrPtrTy,
2508                                 CGM.Int64Ty->getPointerTo(),
2509                                 CGM.Int64Ty->getPointerTo()};
2510     auto *FnTy =
2511         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2513     break;
2514   }
2515   case OMPRTL__tgt_target_data_update_nowait: {
2516     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2517     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2518     // *arg_types);
2519     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2520                                 CGM.Int32Ty,
2521                                 CGM.VoidPtrPtrTy,
2522                                 CGM.VoidPtrPtrTy,
2523                                 CGM.Int64Ty->getPointerTo(),
2524                                 CGM.Int64Ty->getPointerTo()};
2525     auto *FnTy =
2526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2528     break;
2529   }
2530   case OMPRTL__tgt_mapper_num_components: {
2531     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2532     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2533     auto *FnTy =
2534         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2535     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2536     break;
2537   }
2538   case OMPRTL__tgt_push_mapper_component: {
2539     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2540     // *base, void *begin, int64_t size, int64_t type);
2541     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2542                                 CGM.Int64Ty, CGM.Int64Ty};
2543     auto *FnTy =
2544         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2545     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2546     break;
2547   }
2548   case OMPRTL__kmpc_task_allow_completion_event: {
2549     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
2550     // int gtid, kmp_task_t *task);
2551     auto *FnTy = llvm::FunctionType::get(
2552         CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy},
2553         /*isVarArg=*/false);
2554     RTLFn =
2555         CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event");
2556     break;
2557   }
2558   }
2559   assert(RTLFn && "Unable to find OpenMP runtime function");
2560   return RTLFn;
2561 }
2562 
2563 llvm::FunctionCallee
2564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2565   assert((IVSize == 32 || IVSize == 64) &&
2566          "IV size is not compatible with the omp runtime");
2567   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2568                                             : "__kmpc_for_static_init_4u")
2569                                 : (IVSigned ? "__kmpc_for_static_init_8"
2570                                             : "__kmpc_for_static_init_8u");
2571   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2572   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2573   llvm::Type *TypeParams[] = {
2574     getIdentTyPointerTy(),                     // loc
2575     CGM.Int32Ty,                               // tid
2576     CGM.Int32Ty,                               // schedtype
2577     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2578     PtrTy,                                     // p_lower
2579     PtrTy,                                     // p_upper
2580     PtrTy,                                     // p_stride
2581     ITy,                                       // incr
2582     ITy                                        // chunk
2583   };
2584   auto *FnTy =
2585       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2586   return CGM.CreateRuntimeFunction(FnTy, Name);
2587 }
2588 
2589 llvm::FunctionCallee
2590 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2591   assert((IVSize == 32 || IVSize == 64) &&
2592          "IV size is not compatible with the omp runtime");
2593   StringRef Name =
2594       IVSize == 32
2595           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2596           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2597   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2599                                CGM.Int32Ty,           // tid
2600                                CGM.Int32Ty,           // schedtype
2601                                ITy,                   // lower
2602                                ITy,                   // upper
2603                                ITy,                   // stride
2604                                ITy                    // chunk
2605   };
2606   auto *FnTy =
2607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2608   return CGM.CreateRuntimeFunction(FnTy, Name);
2609 }
2610 
2611 llvm::FunctionCallee
2612 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2613   assert((IVSize == 32 || IVSize == 64) &&
2614          "IV size is not compatible with the omp runtime");
2615   StringRef Name =
2616       IVSize == 32
2617           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2618           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2619   llvm::Type *TypeParams[] = {
2620       getIdentTyPointerTy(), // loc
2621       CGM.Int32Ty,           // tid
2622   };
2623   auto *FnTy =
2624       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2625   return CGM.CreateRuntimeFunction(FnTy, Name);
2626 }
2627 
2628 llvm::FunctionCallee
2629 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2630   assert((IVSize == 32 || IVSize == 64) &&
2631          "IV size is not compatible with the omp runtime");
2632   StringRef Name =
2633       IVSize == 32
2634           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2635           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2636   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2637   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2638   llvm::Type *TypeParams[] = {
2639     getIdentTyPointerTy(),                     // loc
2640     CGM.Int32Ty,                               // tid
2641     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2642     PtrTy,                                     // p_lower
2643     PtrTy,                                     // p_upper
2644     PtrTy                                      // p_stride
2645   };
2646   auto *FnTy =
2647       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2648   return CGM.CreateRuntimeFunction(FnTy, Name);
2649 }
2650 
2651 /// Obtain information that uniquely identifies a target entry. This
2652 /// consists of the file and device IDs as well as line number associated with
2653 /// the relevant entry source location.
2654 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2655                                      unsigned &DeviceID, unsigned &FileID,
2656                                      unsigned &LineNum) {
2657   SourceManager &SM = C.getSourceManager();
2658 
2659   // The loc should be always valid and have a file ID (the user cannot use
2660   // #pragma directives in macros)
2661 
2662   assert(Loc.isValid() && "Source location is expected to be always valid.");
2663 
2664   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2665   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2666 
2667   llvm::sys::fs::UniqueID ID;
2668   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2669     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2670         << PLoc.getFilename() << EC.message();
2671 
2672   DeviceID = ID.getDevice();
2673   FileID = ID.getFile();
2674   LineNum = PLoc.getLine();
2675 }
2676 
2677 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2678   if (CGM.getLangOpts().OpenMPSimd)
2679     return Address::invalid();
2680   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2681       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2682   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2683               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2684                HasRequiresUnifiedSharedMemory))) {
2685     SmallString<64> PtrName;
2686     {
2687       llvm::raw_svector_ostream OS(PtrName);
2688       OS << CGM.getMangledName(GlobalDecl(VD));
2689       if (!VD->isExternallyVisible()) {
2690         unsigned DeviceID, FileID, Line;
2691         getTargetEntryUniqueInfo(CGM.getContext(),
2692                                  VD->getCanonicalDecl()->getBeginLoc(),
2693                                  DeviceID, FileID, Line);
2694         OS << llvm::format("_%x", FileID);
2695       }
2696       OS << "_decl_tgt_ref_ptr";
2697     }
2698     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2699     if (!Ptr) {
2700       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2701       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2702                                         PtrName);
2703 
2704       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2705       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2706 
2707       if (!CGM.getLangOpts().OpenMPIsDevice)
2708         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2709       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2710     }
2711     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2712   }
2713   return Address::invalid();
2714 }
2715 
2716 llvm::Constant *
2717 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2718   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2719          !CGM.getContext().getTargetInfo().isTLSSupported());
2720   // Lookup the entry, lazily creating it if necessary.
2721   std::string Suffix = getName({"cache", ""});
2722   return getOrCreateInternalVariable(
2723       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2724 }
2725 
2726 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2727                                                 const VarDecl *VD,
2728                                                 Address VDAddr,
2729                                                 SourceLocation Loc) {
2730   if (CGM.getLangOpts().OpenMPUseTLS &&
2731       CGM.getContext().getTargetInfo().isTLSSupported())
2732     return VDAddr;
2733 
2734   llvm::Type *VarTy = VDAddr.getElementType();
2735   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2736                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2737                                                        CGM.Int8PtrTy),
2738                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2739                          getOrCreateThreadPrivateCache(VD)};
2740   return Address(CGF.EmitRuntimeCall(
2741       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2742                  VDAddr.getAlignment());
2743 }
2744 
2745 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2746     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2747     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2748   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2749   // library.
2750   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2751   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2752                       OMPLoc);
2753   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2754   // to register constructor/destructor for variable.
2755   llvm::Value *Args[] = {
2756       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2757       Ctor, CopyCtor, Dtor};
2758   CGF.EmitRuntimeCall(
2759       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2760 }
2761 
2762 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2763     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2764     bool PerformInit, CodeGenFunction *CGF) {
2765   if (CGM.getLangOpts().OpenMPUseTLS &&
2766       CGM.getContext().getTargetInfo().isTLSSupported())
2767     return nullptr;
2768 
2769   VD = VD->getDefinition(CGM.getContext());
2770   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2771     QualType ASTTy = VD->getType();
2772 
2773     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2774     const Expr *Init = VD->getAnyInitializer();
2775     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2776       // Generate function that re-emits the declaration's initializer into the
2777       // threadprivate copy of the variable VD
2778       CodeGenFunction CtorCGF(CGM);
2779       FunctionArgList Args;
2780       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2781                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2782                             ImplicitParamDecl::Other);
2783       Args.push_back(&Dst);
2784 
2785       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2786           CGM.getContext().VoidPtrTy, Args);
2787       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2788       std::string Name = getName({"__kmpc_global_ctor_", ""});
2789       llvm::Function *Fn =
2790           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2791       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2792                             Args, Loc, Loc);
2793       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2794           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2795           CGM.getContext().VoidPtrTy, Dst.getLocation());
2796       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2797       Arg = CtorCGF.Builder.CreateElementBitCast(
2798           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2799       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2800                                /*IsInitializer=*/true);
2801       ArgVal = CtorCGF.EmitLoadOfScalar(
2802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2803           CGM.getContext().VoidPtrTy, Dst.getLocation());
2804       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2805       CtorCGF.FinishFunction();
2806       Ctor = Fn;
2807     }
2808     if (VD->getType().isDestructedType() != QualType::DK_none) {
2809       // Generate function that emits destructor call for the threadprivate copy
2810       // of the variable VD
2811       CodeGenFunction DtorCGF(CGM);
2812       FunctionArgList Args;
2813       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2814                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2815                             ImplicitParamDecl::Other);
2816       Args.push_back(&Dst);
2817 
2818       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2819           CGM.getContext().VoidTy, Args);
2820       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2821       std::string Name = getName({"__kmpc_global_dtor_", ""});
2822       llvm::Function *Fn =
2823           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2824       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2825       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2826                             Loc, Loc);
2827       // Create a scope with an artificial location for the body of this function.
2828       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2829       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2830           DtorCGF.GetAddrOfLocalVar(&Dst),
2831           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2832       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2833                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2834                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2835       DtorCGF.FinishFunction();
2836       Dtor = Fn;
2837     }
2838     // Do not emit init function if it is not required.
2839     if (!Ctor && !Dtor)
2840       return nullptr;
2841 
2842     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2843     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2844                                                /*isVarArg=*/false)
2845                            ->getPointerTo();
2846     // Copying constructor for the threadprivate variable.
2847     // Must be NULL - reserved by runtime, but currently it requires that this
2848     // parameter is always NULL. Otherwise it fires assertion.
2849     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2850     if (Ctor == nullptr) {
2851       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2852                                              /*isVarArg=*/false)
2853                          ->getPointerTo();
2854       Ctor = llvm::Constant::getNullValue(CtorTy);
2855     }
2856     if (Dtor == nullptr) {
2857       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2858                                              /*isVarArg=*/false)
2859                          ->getPointerTo();
2860       Dtor = llvm::Constant::getNullValue(DtorTy);
2861     }
2862     if (!CGF) {
2863       auto *InitFunctionTy =
2864           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2865       std::string Name = getName({"__omp_threadprivate_init_", ""});
2866       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2867           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2868       CodeGenFunction InitCGF(CGM);
2869       FunctionArgList ArgList;
2870       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2871                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2872                             Loc, Loc);
2873       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2874       InitCGF.FinishFunction();
2875       return InitFunction;
2876     }
2877     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2878   }
2879   return nullptr;
2880 }
2881 
2882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2883                                                      llvm::GlobalVariable *Addr,
2884                                                      bool PerformInit) {
2885   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2886       !CGM.getLangOpts().OpenMPIsDevice)
2887     return false;
2888   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2889       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2890   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2891       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2892        HasRequiresUnifiedSharedMemory))
2893     return CGM.getLangOpts().OpenMPIsDevice;
2894   VD = VD->getDefinition(CGM.getContext());
2895   assert(VD && "Unknown VarDecl");
2896 
2897   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2898     return CGM.getLangOpts().OpenMPIsDevice;
2899 
2900   QualType ASTTy = VD->getType();
2901   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2902 
2903   // Produce the unique prefix to identify the new target regions. We use
2904   // the source location of the variable declaration which we know to not
2905   // conflict with any target region.
2906   unsigned DeviceID;
2907   unsigned FileID;
2908   unsigned Line;
2909   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2910   SmallString<128> Buffer, Out;
2911   {
2912     llvm::raw_svector_ostream OS(Buffer);
2913     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2914        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2915   }
2916 
2917   const Expr *Init = VD->getAnyInitializer();
2918   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2919     llvm::Constant *Ctor;
2920     llvm::Constant *ID;
2921     if (CGM.getLangOpts().OpenMPIsDevice) {
2922       // Generate function that re-emits the declaration's initializer into
2923       // the threadprivate copy of the variable VD
2924       CodeGenFunction CtorCGF(CGM);
2925 
2926       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2927       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2928       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2929           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2930       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2931       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2932                             FunctionArgList(), Loc, Loc);
2933       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2934       CtorCGF.EmitAnyExprToMem(Init,
2935                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2936                                Init->getType().getQualifiers(),
2937                                /*IsInitializer=*/true);
2938       CtorCGF.FinishFunction();
2939       Ctor = Fn;
2940       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2941       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2942     } else {
2943       Ctor = new llvm::GlobalVariable(
2944           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2945           llvm::GlobalValue::PrivateLinkage,
2946           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2947       ID = Ctor;
2948     }
2949 
2950     // Register the information for the entry associated with the constructor.
2951     Out.clear();
2952     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2953         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2954         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2955   }
2956   if (VD->getType().isDestructedType() != QualType::DK_none) {
2957     llvm::Constant *Dtor;
2958     llvm::Constant *ID;
2959     if (CGM.getLangOpts().OpenMPIsDevice) {
2960       // Generate function that emits destructor call for the threadprivate
2961       // copy of the variable VD
2962       CodeGenFunction DtorCGF(CGM);
2963 
2964       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2965       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2966       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2967           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2968       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2969       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2970                             FunctionArgList(), Loc, Loc);
2971       // Create a scope with an artificial location for the body of this
2972       // function.
2973       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2974       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2975                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2976                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2977       DtorCGF.FinishFunction();
2978       Dtor = Fn;
2979       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2980       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2981     } else {
2982       Dtor = new llvm::GlobalVariable(
2983           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2984           llvm::GlobalValue::PrivateLinkage,
2985           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2986       ID = Dtor;
2987     }
2988     // Register the information for the entry associated with the destructor.
2989     Out.clear();
2990     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2991         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2992         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2993   }
2994   return CGM.getLangOpts().OpenMPIsDevice;
2995 }
2996 
2997 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2998                                                           QualType VarType,
2999                                                           StringRef Name) {
3000   std::string Suffix = getName({"artificial", ""});
3001   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3002   llvm::Value *GAddr =
3003       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3004   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3005       CGM.getTarget().isTLSSupported()) {
3006     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3007     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3008   }
3009   std::string CacheSuffix = getName({"cache", ""});
3010   llvm::Value *Args[] = {
3011       emitUpdateLocation(CGF, SourceLocation()),
3012       getThreadID(CGF, SourceLocation()),
3013       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3014       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3015                                 /*isSigned=*/false),
3016       getOrCreateInternalVariable(
3017           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3018   return Address(
3019       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020           CGF.EmitRuntimeCall(
3021               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3022           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3023       CGM.getContext().getTypeAlignInChars(VarType));
3024 }
3025 
3026 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3027                                    const RegionCodeGenTy &ThenGen,
3028                                    const RegionCodeGenTy &ElseGen) {
3029   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3030 
3031   // If the condition constant folds and can be elided, try to avoid emitting
3032   // the condition and the dead arm of the if/else.
3033   bool CondConstant;
3034   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3035     if (CondConstant)
3036       ThenGen(CGF);
3037     else
3038       ElseGen(CGF);
3039     return;
3040   }
3041 
3042   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3043   // emit the conditional branch.
3044   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3045   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3046   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3047   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3048 
3049   // Emit the 'then' code.
3050   CGF.EmitBlock(ThenBlock);
3051   ThenGen(CGF);
3052   CGF.EmitBranch(ContBlock);
3053   // Emit the 'else' code if present.
3054   // There is no need to emit line number for unconditional branch.
3055   (void)ApplyDebugLocation::CreateEmpty(CGF);
3056   CGF.EmitBlock(ElseBlock);
3057   ElseGen(CGF);
3058   // There is no need to emit line number for unconditional branch.
3059   (void)ApplyDebugLocation::CreateEmpty(CGF);
3060   CGF.EmitBranch(ContBlock);
3061   // Emit the continuation block for code after the if.
3062   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3063 }
3064 
3065 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3066                                        llvm::Function *OutlinedFn,
3067                                        ArrayRef<llvm::Value *> CapturedVars,
3068                                        const Expr *IfCond) {
3069   if (!CGF.HaveInsertPoint())
3070     return;
3071   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3072   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3073                                                      PrePostActionTy &) {
3074     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3075     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3076     llvm::Value *Args[] = {
3077         RTLoc,
3078         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3079         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3080     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3081     RealArgs.append(std::begin(Args), std::end(Args));
3082     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3083 
3084     llvm::FunctionCallee RTLFn =
3085         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3086     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3087   };
3088   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3089                                                           PrePostActionTy &) {
3090     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3091     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3092     // Build calls:
3093     // __kmpc_serialized_parallel(&Loc, GTid);
3094     llvm::Value *Args[] = {RTLoc, ThreadID};
3095     CGF.EmitRuntimeCall(
3096         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3097 
3098     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3099     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3100     Address ZeroAddrBound =
3101         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3102                                          /*Name=*/".bound.zero.addr");
3103     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3104     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3105     // ThreadId for serialized parallels is 0.
3106     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3107     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3108     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3109     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3110 
3111     // __kmpc_end_serialized_parallel(&Loc, GTid);
3112     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3113     CGF.EmitRuntimeCall(
3114         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3115         EndArgs);
3116   };
3117   if (IfCond) {
3118     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3119   } else {
3120     RegionCodeGenTy ThenRCG(ThenGen);
3121     ThenRCG(CGF);
3122   }
3123 }
3124 
3125 // If we're inside an (outlined) parallel region, use the region info's
3126 // thread-ID variable (it is passed in a first argument of the outlined function
3127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3128 // regular serial code region, get thread ID by calling kmp_int32
3129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3130 // return the address of that temp.
3131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3132                                              SourceLocation Loc) {
3133   if (auto *OMPRegionInfo =
3134           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3135     if (OMPRegionInfo->getThreadIDVariable())
3136       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3137 
3138   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3139   QualType Int32Ty =
3140       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3141   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3142   CGF.EmitStoreOfScalar(ThreadID,
3143                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3144 
3145   return ThreadIDTemp;
3146 }
3147 
3148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3149     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3150   SmallString<256> Buffer;
3151   llvm::raw_svector_ostream Out(Buffer);
3152   Out << Name;
3153   StringRef RuntimeName = Out.str();
3154   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3155   if (Elem.second) {
3156     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3157            "OMP internal variable has different type than requested");
3158     return &*Elem.second;
3159   }
3160 
3161   return Elem.second = new llvm::GlobalVariable(
3162              CGM.getModule(), Ty, /*IsConstant*/ false,
3163              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3164              Elem.first(), /*InsertBefore=*/nullptr,
3165              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3166 }
3167 
3168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3169   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3170   std::string Name = getName({Prefix, "var"});
3171   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3172 }
3173 
3174 namespace {
3175 /// Common pre(post)-action for different OpenMP constructs.
3176 class CommonActionTy final : public PrePostActionTy {
3177   llvm::FunctionCallee EnterCallee;
3178   ArrayRef<llvm::Value *> EnterArgs;
3179   llvm::FunctionCallee ExitCallee;
3180   ArrayRef<llvm::Value *> ExitArgs;
3181   bool Conditional;
3182   llvm::BasicBlock *ContBlock = nullptr;
3183 
3184 public:
3185   CommonActionTy(llvm::FunctionCallee EnterCallee,
3186                  ArrayRef<llvm::Value *> EnterArgs,
3187                  llvm::FunctionCallee ExitCallee,
3188                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3189       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3190         ExitArgs(ExitArgs), Conditional(Conditional) {}
3191   void Enter(CodeGenFunction &CGF) override {
3192     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3193     if (Conditional) {
3194       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3195       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3196       ContBlock = CGF.createBasicBlock("omp_if.end");
3197       // Generate the branch (If-stmt)
3198       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3199       CGF.EmitBlock(ThenBlock);
3200     }
3201   }
3202   void Done(CodeGenFunction &CGF) {
3203     // Emit the rest of blocks/branches
3204     CGF.EmitBranch(ContBlock);
3205     CGF.EmitBlock(ContBlock, true);
3206   }
3207   void Exit(CodeGenFunction &CGF) override {
3208     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3209   }
3210 };
3211 } // anonymous namespace
3212 
3213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3214                                          StringRef CriticalName,
3215                                          const RegionCodeGenTy &CriticalOpGen,
3216                                          SourceLocation Loc, const Expr *Hint) {
3217   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3218   // CriticalOpGen();
3219   // __kmpc_end_critical(ident_t *, gtid, Lock);
3220   // Prepare arguments and build a call to __kmpc_critical
3221   if (!CGF.HaveInsertPoint())
3222     return;
3223   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3224                          getCriticalRegionLock(CriticalName)};
3225   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3226                                                 std::end(Args));
3227   if (Hint) {
3228     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3229         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3230   }
3231   CommonActionTy Action(
3232       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3233                                  : OMPRTL__kmpc_critical),
3234       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3235   CriticalOpGen.setAction(Action);
3236   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3237 }
3238 
3239 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3240                                        const RegionCodeGenTy &MasterOpGen,
3241                                        SourceLocation Loc) {
3242   if (!CGF.HaveInsertPoint())
3243     return;
3244   // if(__kmpc_master(ident_t *, gtid)) {
3245   //   MasterOpGen();
3246   //   __kmpc_end_master(ident_t *, gtid);
3247   // }
3248   // Prepare arguments and build a call to __kmpc_master
3249   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3250   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3251                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3252                         /*Conditional=*/true);
3253   MasterOpGen.setAction(Action);
3254   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3255   Action.Done(CGF);
3256 }
3257 
3258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3259                                         SourceLocation Loc) {
3260   if (!CGF.HaveInsertPoint())
3261     return;
3262   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3263   if (OMPBuilder) {
3264     OMPBuilder->CreateTaskyield(CGF.Builder);
3265   } else {
3266     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3267     llvm::Value *Args[] = {
3268         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3269         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3270     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3271                         Args);
3272   }
3273 
3274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3275     Region->emitUntiedSwitch(CGF);
3276 }
3277 
3278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3279                                           const RegionCodeGenTy &TaskgroupOpGen,
3280                                           SourceLocation Loc) {
3281   if (!CGF.HaveInsertPoint())
3282     return;
3283   // __kmpc_taskgroup(ident_t *, gtid);
3284   // TaskgroupOpGen();
3285   // __kmpc_end_taskgroup(ident_t *, gtid);
3286   // Prepare arguments and build a call to __kmpc_taskgroup
3287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3288   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3289                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3290                         Args);
3291   TaskgroupOpGen.setAction(Action);
3292   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3293 }
3294 
3295 /// Given an array of pointers to variables, project the address of a
3296 /// given variable.
3297 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3298                                       unsigned Index, const VarDecl *Var) {
3299   // Pull out the pointer to the variable.
3300   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3301   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3302 
3303   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3304   Addr = CGF.Builder.CreateElementBitCast(
3305       Addr, CGF.ConvertTypeForMem(Var->getType()));
3306   return Addr;
3307 }
3308 
3309 static llvm::Value *emitCopyprivateCopyFunction(
3310     CodeGenModule &CGM, llvm::Type *ArgsType,
3311     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3312     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3313     SourceLocation Loc) {
3314   ASTContext &C = CGM.getContext();
3315   // void copy_func(void *LHSArg, void *RHSArg);
3316   FunctionArgList Args;
3317   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3318                            ImplicitParamDecl::Other);
3319   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3320                            ImplicitParamDecl::Other);
3321   Args.push_back(&LHSArg);
3322   Args.push_back(&RHSArg);
3323   const auto &CGFI =
3324       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3325   std::string Name =
3326       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3327   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3328                                     llvm::GlobalValue::InternalLinkage, Name,
3329                                     &CGM.getModule());
3330   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3331   Fn->setDoesNotRecurse();
3332   CodeGenFunction CGF(CGM);
3333   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3334   // Dest = (void*[n])(LHSArg);
3335   // Src = (void*[n])(RHSArg);
3336   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3337       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3338       ArgsType), CGF.getPointerAlign());
3339   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3340       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3341       ArgsType), CGF.getPointerAlign());
3342   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3343   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3344   // ...
3345   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3346   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3347     const auto *DestVar =
3348         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3349     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3350 
3351     const auto *SrcVar =
3352         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3353     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3354 
3355     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3356     QualType Type = VD->getType();
3357     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3358   }
3359   CGF.FinishFunction();
3360   return Fn;
3361 }
3362 
3363 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3364                                        const RegionCodeGenTy &SingleOpGen,
3365                                        SourceLocation Loc,
3366                                        ArrayRef<const Expr *> CopyprivateVars,
3367                                        ArrayRef<const Expr *> SrcExprs,
3368                                        ArrayRef<const Expr *> DstExprs,
3369                                        ArrayRef<const Expr *> AssignmentOps) {
3370   if (!CGF.HaveInsertPoint())
3371     return;
3372   assert(CopyprivateVars.size() == SrcExprs.size() &&
3373          CopyprivateVars.size() == DstExprs.size() &&
3374          CopyprivateVars.size() == AssignmentOps.size());
3375   ASTContext &C = CGM.getContext();
3376   // int32 did_it = 0;
3377   // if(__kmpc_single(ident_t *, gtid)) {
3378   //   SingleOpGen();
3379   //   __kmpc_end_single(ident_t *, gtid);
3380   //   did_it = 1;
3381   // }
3382   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3383   // <copy_func>, did_it);
3384 
3385   Address DidIt = Address::invalid();
3386   if (!CopyprivateVars.empty()) {
3387     // int32 did_it = 0;
3388     QualType KmpInt32Ty =
3389         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3390     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3391     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3392   }
3393   // Prepare arguments and build a call to __kmpc_single
3394   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3395   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3396                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3397                         /*Conditional=*/true);
3398   SingleOpGen.setAction(Action);
3399   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3400   if (DidIt.isValid()) {
3401     // did_it = 1;
3402     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3403   }
3404   Action.Done(CGF);
3405   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3406   // <copy_func>, did_it);
3407   if (DidIt.isValid()) {
3408     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3409     QualType CopyprivateArrayTy = C.getConstantArrayType(
3410         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3411         /*IndexTypeQuals=*/0);
3412     // Create a list of all private variables for copyprivate.
3413     Address CopyprivateList =
3414         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3415     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3416       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3417       CGF.Builder.CreateStore(
3418           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3419               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3420               CGF.VoidPtrTy),
3421           Elem);
3422     }
3423     // Build function that copies private values from single region to all other
3424     // threads in the corresponding parallel region.
3425     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3426         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3427         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3428     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3429     Address CL =
3430       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3431                                                       CGF.VoidPtrTy);
3432     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3433     llvm::Value *Args[] = {
3434         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3435         getThreadID(CGF, Loc),        // i32 <gtid>
3436         BufSize,                      // size_t <buf_size>
3437         CL.getPointer(),              // void *<copyprivate list>
3438         CpyFn,                        // void (*) (void *, void *) <copy_func>
3439         DidItVal                      // i32 did_it
3440     };
3441     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3442   }
3443 }
3444 
3445 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3446                                         const RegionCodeGenTy &OrderedOpGen,
3447                                         SourceLocation Loc, bool IsThreads) {
3448   if (!CGF.HaveInsertPoint())
3449     return;
3450   // __kmpc_ordered(ident_t *, gtid);
3451   // OrderedOpGen();
3452   // __kmpc_end_ordered(ident_t *, gtid);
3453   // Prepare arguments and build a call to __kmpc_ordered
3454   if (IsThreads) {
3455     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3456     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3457                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3458                           Args);
3459     OrderedOpGen.setAction(Action);
3460     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3461     return;
3462   }
3463   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3464 }
3465 
3466 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3467   unsigned Flags;
3468   if (Kind == OMPD_for)
3469     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3470   else if (Kind == OMPD_sections)
3471     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3472   else if (Kind == OMPD_single)
3473     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3474   else if (Kind == OMPD_barrier)
3475     Flags = OMP_IDENT_BARRIER_EXPL;
3476   else
3477     Flags = OMP_IDENT_BARRIER_IMPL;
3478   return Flags;
3479 }
3480 
3481 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3482     CodeGenFunction &CGF, const OMPLoopDirective &S,
3483     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3484   // Check if the loop directive is actually a doacross loop directive. In this
3485   // case choose static, 1 schedule.
3486   if (llvm::any_of(
3487           S.getClausesOfKind<OMPOrderedClause>(),
3488           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3489     ScheduleKind = OMPC_SCHEDULE_static;
3490     // Chunk size is 1 in this case.
3491     llvm::APInt ChunkSize(32, 1);
3492     ChunkExpr = IntegerLiteral::Create(
3493         CGF.getContext(), ChunkSize,
3494         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3495         SourceLocation());
3496   }
3497 }
3498 
3499 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3500                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3501                                       bool ForceSimpleCall) {
3502   // Check if we should use the OMPBuilder
3503   auto *OMPRegionInfo =
3504       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3505   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3506   if (OMPBuilder) {
3507     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3508         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3509     return;
3510   }
3511 
3512   if (!CGF.HaveInsertPoint())
3513     return;
3514   // Build call __kmpc_cancel_barrier(loc, thread_id);
3515   // Build call __kmpc_barrier(loc, thread_id);
3516   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3517   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3518   // thread_id);
3519   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3520                          getThreadID(CGF, Loc)};
3521   if (OMPRegionInfo) {
3522     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3523       llvm::Value *Result = CGF.EmitRuntimeCall(
3524           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3525       if (EmitChecks) {
3526         // if (__kmpc_cancel_barrier()) {
3527         //   exit from construct;
3528         // }
3529         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3530         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3531         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3532         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3533         CGF.EmitBlock(ExitBB);
3534         //   exit from construct;
3535         CodeGenFunction::JumpDest CancelDestination =
3536             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3537         CGF.EmitBranchThroughCleanup(CancelDestination);
3538         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3539       }
3540       return;
3541     }
3542   }
3543   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3544 }
3545 
3546 /// Map the OpenMP loop schedule to the runtime enumeration.
3547 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3548                                           bool Chunked, bool Ordered) {
3549   switch (ScheduleKind) {
3550   case OMPC_SCHEDULE_static:
3551     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3552                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3553   case OMPC_SCHEDULE_dynamic:
3554     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3555   case OMPC_SCHEDULE_guided:
3556     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3557   case OMPC_SCHEDULE_runtime:
3558     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3559   case OMPC_SCHEDULE_auto:
3560     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3561   case OMPC_SCHEDULE_unknown:
3562     assert(!Chunked && "chunk was specified but schedule kind not known");
3563     return Ordered ? OMP_ord_static : OMP_sch_static;
3564   }
3565   llvm_unreachable("Unexpected runtime schedule");
3566 }
3567 
3568 /// Map the OpenMP distribute schedule to the runtime enumeration.
3569 static OpenMPSchedType
3570 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3571   // only static is allowed for dist_schedule
3572   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3573 }
3574 
3575 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3576                                          bool Chunked) const {
3577   OpenMPSchedType Schedule =
3578       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3579   return Schedule == OMP_sch_static;
3580 }
3581 
3582 bool CGOpenMPRuntime::isStaticNonchunked(
3583     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3584   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3585   return Schedule == OMP_dist_sch_static;
3586 }
3587 
3588 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3589                                       bool Chunked) const {
3590   OpenMPSchedType Schedule =
3591       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3592   return Schedule == OMP_sch_static_chunked;
3593 }
3594 
3595 bool CGOpenMPRuntime::isStaticChunked(
3596     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3597   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3598   return Schedule == OMP_dist_sch_static_chunked;
3599 }
3600 
3601 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3602   OpenMPSchedType Schedule =
3603       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3604   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3605   return Schedule != OMP_sch_static;
3606 }
3607 
3608 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3609                                   OpenMPScheduleClauseModifier M1,
3610                                   OpenMPScheduleClauseModifier M2) {
3611   int Modifier = 0;
3612   switch (M1) {
3613   case OMPC_SCHEDULE_MODIFIER_monotonic:
3614     Modifier = OMP_sch_modifier_monotonic;
3615     break;
3616   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3617     Modifier = OMP_sch_modifier_nonmonotonic;
3618     break;
3619   case OMPC_SCHEDULE_MODIFIER_simd:
3620     if (Schedule == OMP_sch_static_chunked)
3621       Schedule = OMP_sch_static_balanced_chunked;
3622     break;
3623   case OMPC_SCHEDULE_MODIFIER_last:
3624   case OMPC_SCHEDULE_MODIFIER_unknown:
3625     break;
3626   }
3627   switch (M2) {
3628   case OMPC_SCHEDULE_MODIFIER_monotonic:
3629     Modifier = OMP_sch_modifier_monotonic;
3630     break;
3631   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3632     Modifier = OMP_sch_modifier_nonmonotonic;
3633     break;
3634   case OMPC_SCHEDULE_MODIFIER_simd:
3635     if (Schedule == OMP_sch_static_chunked)
3636       Schedule = OMP_sch_static_balanced_chunked;
3637     break;
3638   case OMPC_SCHEDULE_MODIFIER_last:
3639   case OMPC_SCHEDULE_MODIFIER_unknown:
3640     break;
3641   }
3642   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3643   // If the static schedule kind is specified or if the ordered clause is
3644   // specified, and if the nonmonotonic modifier is not specified, the effect is
3645   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3646   // modifier is specified, the effect is as if the nonmonotonic modifier is
3647   // specified.
3648   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3649     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3650           Schedule == OMP_sch_static_balanced_chunked ||
3651           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3652           Schedule == OMP_dist_sch_static_chunked ||
3653           Schedule == OMP_dist_sch_static))
3654       Modifier = OMP_sch_modifier_nonmonotonic;
3655   }
3656   return Schedule | Modifier;
3657 }
3658 
3659 void CGOpenMPRuntime::emitForDispatchInit(
3660     CodeGenFunction &CGF, SourceLocation Loc,
3661     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3662     bool Ordered, const DispatchRTInput &DispatchValues) {
3663   if (!CGF.HaveInsertPoint())
3664     return;
3665   OpenMPSchedType Schedule = getRuntimeSchedule(
3666       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3667   assert(Ordered ||
3668          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3669           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3670           Schedule != OMP_sch_static_balanced_chunked));
3671   // Call __kmpc_dispatch_init(
3672   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3673   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3674   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3675 
3676   // If the Chunk was not specified in the clause - use default value 1.
3677   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3678                                             : CGF.Builder.getIntN(IVSize, 1);
3679   llvm::Value *Args[] = {
3680       emitUpdateLocation(CGF, Loc),
3681       getThreadID(CGF, Loc),
3682       CGF.Builder.getInt32(addMonoNonMonoModifier(
3683           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3684       DispatchValues.LB,                                     // Lower
3685       DispatchValues.UB,                                     // Upper
3686       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3687       Chunk                                                  // Chunk
3688   };
3689   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3690 }
3691 
3692 static void emitForStaticInitCall(
3693     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3694     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3695     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3696     const CGOpenMPRuntime::StaticRTInput &Values) {
3697   if (!CGF.HaveInsertPoint())
3698     return;
3699 
3700   assert(!Values.Ordered);
3701   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3702          Schedule == OMP_sch_static_balanced_chunked ||
3703          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3704          Schedule == OMP_dist_sch_static ||
3705          Schedule == OMP_dist_sch_static_chunked);
3706 
3707   // Call __kmpc_for_static_init(
3708   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3709   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3710   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3711   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3712   llvm::Value *Chunk = Values.Chunk;
3713   if (Chunk == nullptr) {
3714     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3715             Schedule == OMP_dist_sch_static) &&
3716            "expected static non-chunked schedule");
3717     // If the Chunk was not specified in the clause - use default value 1.
3718     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3719   } else {
3720     assert((Schedule == OMP_sch_static_chunked ||
3721             Schedule == OMP_sch_static_balanced_chunked ||
3722             Schedule == OMP_ord_static_chunked ||
3723             Schedule == OMP_dist_sch_static_chunked) &&
3724            "expected static chunked schedule");
3725   }
3726   llvm::Value *Args[] = {
3727       UpdateLocation,
3728       ThreadId,
3729       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3730                                                   M2)), // Schedule type
3731       Values.IL.getPointer(),                           // &isLastIter
3732       Values.LB.getPointer(),                           // &LB
3733       Values.UB.getPointer(),                           // &UB
3734       Values.ST.getPointer(),                           // &Stride
3735       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3736       Chunk                                             // Chunk
3737   };
3738   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3739 }
3740 
3741 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3742                                         SourceLocation Loc,
3743                                         OpenMPDirectiveKind DKind,
3744                                         const OpenMPScheduleTy &ScheduleKind,
3745                                         const StaticRTInput &Values) {
3746   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3747       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3748   assert(isOpenMPWorksharingDirective(DKind) &&
3749          "Expected loop-based or sections-based directive.");
3750   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3751                                              isOpenMPLoopDirective(DKind)
3752                                                  ? OMP_IDENT_WORK_LOOP
3753                                                  : OMP_IDENT_WORK_SECTIONS);
3754   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3755   llvm::FunctionCallee StaticInitFunction =
3756       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3757   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3758   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3759                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3760 }
3761 
3762 void CGOpenMPRuntime::emitDistributeStaticInit(
3763     CodeGenFunction &CGF, SourceLocation Loc,
3764     OpenMPDistScheduleClauseKind SchedKind,
3765     const CGOpenMPRuntime::StaticRTInput &Values) {
3766   OpenMPSchedType ScheduleNum =
3767       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3768   llvm::Value *UpdatedLocation =
3769       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3770   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3771   llvm::FunctionCallee StaticInitFunction =
3772       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3773   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3774                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3775                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3776 }
3777 
3778 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3779                                           SourceLocation Loc,
3780                                           OpenMPDirectiveKind DKind) {
3781   if (!CGF.HaveInsertPoint())
3782     return;
3783   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3784   llvm::Value *Args[] = {
3785       emitUpdateLocation(CGF, Loc,
3786                          isOpenMPDistributeDirective(DKind)
3787                              ? OMP_IDENT_WORK_DISTRIBUTE
3788                              : isOpenMPLoopDirective(DKind)
3789                                    ? OMP_IDENT_WORK_LOOP
3790                                    : OMP_IDENT_WORK_SECTIONS),
3791       getThreadID(CGF, Loc)};
3792   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3793   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3794                       Args);
3795 }
3796 
3797 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3798                                                  SourceLocation Loc,
3799                                                  unsigned IVSize,
3800                                                  bool IVSigned) {
3801   if (!CGF.HaveInsertPoint())
3802     return;
3803   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3804   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3805   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3806 }
3807 
3808 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3809                                           SourceLocation Loc, unsigned IVSize,
3810                                           bool IVSigned, Address IL,
3811                                           Address LB, Address UB,
3812                                           Address ST) {
3813   // Call __kmpc_dispatch_next(
3814   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3815   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3816   //          kmp_int[32|64] *p_stride);
3817   llvm::Value *Args[] = {
3818       emitUpdateLocation(CGF, Loc),
3819       getThreadID(CGF, Loc),
3820       IL.getPointer(), // &isLastIter
3821       LB.getPointer(), // &Lower
3822       UB.getPointer(), // &Upper
3823       ST.getPointer()  // &Stride
3824   };
3825   llvm::Value *Call =
3826       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3827   return CGF.EmitScalarConversion(
3828       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3829       CGF.getContext().BoolTy, Loc);
3830 }
3831 
3832 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3833                                            llvm::Value *NumThreads,
3834                                            SourceLocation Loc) {
3835   if (!CGF.HaveInsertPoint())
3836     return;
3837   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3838   llvm::Value *Args[] = {
3839       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3840       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3841   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3842                       Args);
3843 }
3844 
3845 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3846                                          ProcBindKind ProcBind,
3847                                          SourceLocation Loc) {
3848   if (!CGF.HaveInsertPoint())
3849     return;
3850   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3851   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3852   llvm::Value *Args[] = {
3853       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3854       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3855   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3856 }
3857 
3858 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3859                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3860   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3861   if (OMPBuilder) {
3862     OMPBuilder->CreateFlush(CGF.Builder);
3863   } else {
3864     if (!CGF.HaveInsertPoint())
3865       return;
3866     // Build call void __kmpc_flush(ident_t *loc)
3867     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3868                         emitUpdateLocation(CGF, Loc));
3869   }
3870 }
3871 
3872 namespace {
3873 /// Indexes of fields for type kmp_task_t.
3874 enum KmpTaskTFields {
3875   /// List of shared variables.
3876   KmpTaskTShareds,
3877   /// Task routine.
3878   KmpTaskTRoutine,
3879   /// Partition id for the untied tasks.
3880   KmpTaskTPartId,
3881   /// Function with call of destructors for private variables.
3882   Data1,
3883   /// Task priority.
3884   Data2,
3885   /// (Taskloops only) Lower bound.
3886   KmpTaskTLowerBound,
3887   /// (Taskloops only) Upper bound.
3888   KmpTaskTUpperBound,
3889   /// (Taskloops only) Stride.
3890   KmpTaskTStride,
3891   /// (Taskloops only) Is last iteration flag.
3892   KmpTaskTLastIter,
3893   /// (Taskloops only) Reduction data.
3894   KmpTaskTReductions,
3895 };
3896 } // anonymous namespace
3897 
3898 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3899   return OffloadEntriesTargetRegion.empty() &&
3900          OffloadEntriesDeviceGlobalVar.empty();
3901 }
3902 
3903 /// Initialize target region entry.
3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3905     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3906                                     StringRef ParentName, unsigned LineNum,
3907                                     unsigned Order) {
3908   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3909                                              "only required for the device "
3910                                              "code generation.");
3911   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3912       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3913                                    OMPTargetRegionEntryTargetRegion);
3914   ++OffloadingEntriesNum;
3915 }
3916 
3917 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3918     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3919                                   StringRef ParentName, unsigned LineNum,
3920                                   llvm::Constant *Addr, llvm::Constant *ID,
3921                                   OMPTargetRegionEntryKind Flags) {
3922   // If we are emitting code for a target, the entry is already initialized,
3923   // only has to be registered.
3924   if (CGM.getLangOpts().OpenMPIsDevice) {
3925     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3926       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3927           DiagnosticsEngine::Error,
3928           "Unable to find target region on line '%0' in the device code.");
3929       CGM.getDiags().Report(DiagID) << LineNum;
3930       return;
3931     }
3932     auto &Entry =
3933         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3934     assert(Entry.isValid() && "Entry not initialized!");
3935     Entry.setAddress(Addr);
3936     Entry.setID(ID);
3937     Entry.setFlags(Flags);
3938   } else {
3939     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3940     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3941     ++OffloadingEntriesNum;
3942   }
3943 }
3944 
3945 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3946     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3947     unsigned LineNum) const {
3948   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3949   if (PerDevice == OffloadEntriesTargetRegion.end())
3950     return false;
3951   auto PerFile = PerDevice->second.find(FileID);
3952   if (PerFile == PerDevice->second.end())
3953     return false;
3954   auto PerParentName = PerFile->second.find(ParentName);
3955   if (PerParentName == PerFile->second.end())
3956     return false;
3957   auto PerLine = PerParentName->second.find(LineNum);
3958   if (PerLine == PerParentName->second.end())
3959     return false;
3960   // Fail if this entry is already registered.
3961   if (PerLine->second.getAddress() || PerLine->second.getID())
3962     return false;
3963   return true;
3964 }
3965 
3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3967     const OffloadTargetRegionEntryInfoActTy &Action) {
3968   // Scan all target region entries and perform the provided action.
3969   for (const auto &D : OffloadEntriesTargetRegion)
3970     for (const auto &F : D.second)
3971       for (const auto &P : F.second)
3972         for (const auto &L : P.second)
3973           Action(D.first, F.first, P.first(), L.first, L.second);
3974 }
3975 
3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3977     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3978                                        OMPTargetGlobalVarEntryKind Flags,
3979                                        unsigned Order) {
3980   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3981                                              "only required for the device "
3982                                              "code generation.");
3983   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3984   ++OffloadingEntriesNum;
3985 }
3986 
3987 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3988     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3989                                      CharUnits VarSize,
3990                                      OMPTargetGlobalVarEntryKind Flags,
3991                                      llvm::GlobalValue::LinkageTypes Linkage) {
3992   if (CGM.getLangOpts().OpenMPIsDevice) {
3993     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3994     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3995            "Entry not initialized!");
3996     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3997            "Resetting with the new address.");
3998     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3999       if (Entry.getVarSize().isZero()) {
4000         Entry.setVarSize(VarSize);
4001         Entry.setLinkage(Linkage);
4002       }
4003       return;
4004     }
4005     Entry.setVarSize(VarSize);
4006     Entry.setLinkage(Linkage);
4007     Entry.setAddress(Addr);
4008   } else {
4009     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4010       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4011       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4012              "Entry not initialized!");
4013       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4014              "Resetting with the new address.");
4015       if (Entry.getVarSize().isZero()) {
4016         Entry.setVarSize(VarSize);
4017         Entry.setLinkage(Linkage);
4018       }
4019       return;
4020     }
4021     OffloadEntriesDeviceGlobalVar.try_emplace(
4022         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4023     ++OffloadingEntriesNum;
4024   }
4025 }
4026 
4027 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4028     actOnDeviceGlobalVarEntriesInfo(
4029         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4030   // Scan all target region entries and perform the provided action.
4031   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4032     Action(E.getKey(), E.getValue());
4033 }
4034 
4035 void CGOpenMPRuntime::createOffloadEntry(
4036     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4037     llvm::GlobalValue::LinkageTypes Linkage) {
4038   StringRef Name = Addr->getName();
4039   llvm::Module &M = CGM.getModule();
4040   llvm::LLVMContext &C = M.getContext();
4041 
4042   // Create constant string with the name.
4043   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4044 
4045   std::string StringName = getName({"omp_offloading", "entry_name"});
4046   auto *Str = new llvm::GlobalVariable(
4047       M, StrPtrInit->getType(), /*isConstant=*/true,
4048       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4049   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4050 
4051   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4052                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4053                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4054                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4055                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4056   std::string EntryName = getName({"omp_offloading", "entry", ""});
4057   llvm::GlobalVariable *Entry = createGlobalStruct(
4058       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4059       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4060 
4061   // The entry has to be created in the section the linker expects it to be.
4062   Entry->setSection("omp_offloading_entries");
4063 }
4064 
4065 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4066   // Emit the offloading entries and metadata so that the device codegen side
4067   // can easily figure out what to emit. The produced metadata looks like
4068   // this:
4069   //
4070   // !omp_offload.info = !{!1, ...}
4071   //
4072   // Right now we only generate metadata for function that contain target
4073   // regions.
4074 
4075   // If we are in simd mode or there are no entries, we don't need to do
4076   // anything.
4077   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4078     return;
4079 
4080   llvm::Module &M = CGM.getModule();
4081   llvm::LLVMContext &C = M.getContext();
4082   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4083                          SourceLocation, StringRef>,
4084               16>
4085       OrderedEntries(OffloadEntriesInfoManager.size());
4086   llvm::SmallVector<StringRef, 16> ParentFunctions(
4087       OffloadEntriesInfoManager.size());
4088 
4089   // Auxiliary methods to create metadata values and strings.
4090   auto &&GetMDInt = [this](unsigned V) {
4091     return llvm::ConstantAsMetadata::get(
4092         llvm::ConstantInt::get(CGM.Int32Ty, V));
4093   };
4094 
4095   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4096 
4097   // Create the offloading info metadata node.
4098   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4099 
4100   // Create function that emits metadata for each target region entry;
4101   auto &&TargetRegionMetadataEmitter =
4102       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4103        &GetMDString](
4104           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4105           unsigned Line,
4106           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4107         // Generate metadata for target regions. Each entry of this metadata
4108         // contains:
4109         // - Entry 0 -> Kind of this type of metadata (0).
4110         // - Entry 1 -> Device ID of the file where the entry was identified.
4111         // - Entry 2 -> File ID of the file where the entry was identified.
4112         // - Entry 3 -> Mangled name of the function where the entry was
4113         // identified.
4114         // - Entry 4 -> Line in the file where the entry was identified.
4115         // - Entry 5 -> Order the entry was created.
4116         // The first element of the metadata node is the kind.
4117         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4118                                  GetMDInt(FileID),      GetMDString(ParentName),
4119                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4120 
4121         SourceLocation Loc;
4122         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4123                   E = CGM.getContext().getSourceManager().fileinfo_end();
4124              I != E; ++I) {
4125           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4126               I->getFirst()->getUniqueID().getFile() == FileID) {
4127             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4128                 I->getFirst(), Line, 1);
4129             break;
4130           }
4131         }
4132         // Save this entry in the right position of the ordered entries array.
4133         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4134         ParentFunctions[E.getOrder()] = ParentName;
4135 
4136         // Add metadata to the named metadata node.
4137         MD->addOperand(llvm::MDNode::get(C, Ops));
4138       };
4139 
4140   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4141       TargetRegionMetadataEmitter);
4142 
4143   // Create function that emits metadata for each device global variable entry;
4144   auto &&DeviceGlobalVarMetadataEmitter =
4145       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4146        MD](StringRef MangledName,
4147            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4148                &E) {
4149         // Generate metadata for global variables. Each entry of this metadata
4150         // contains:
4151         // - Entry 0 -> Kind of this type of metadata (1).
4152         // - Entry 1 -> Mangled name of the variable.
4153         // - Entry 2 -> Declare target kind.
4154         // - Entry 3 -> Order the entry was created.
4155         // The first element of the metadata node is the kind.
4156         llvm::Metadata *Ops[] = {
4157             GetMDInt(E.getKind()), GetMDString(MangledName),
4158             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4159 
4160         // Save this entry in the right position of the ordered entries array.
4161         OrderedEntries[E.getOrder()] =
4162             std::make_tuple(&E, SourceLocation(), MangledName);
4163 
4164         // Add metadata to the named metadata node.
4165         MD->addOperand(llvm::MDNode::get(C, Ops));
4166       };
4167 
4168   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4169       DeviceGlobalVarMetadataEmitter);
4170 
4171   for (const auto &E : OrderedEntries) {
4172     assert(std::get<0>(E) && "All ordered entries must exist!");
4173     if (const auto *CE =
4174             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4175                 std::get<0>(E))) {
4176       if (!CE->getID() || !CE->getAddress()) {
4177         // Do not blame the entry if the parent funtion is not emitted.
4178         StringRef FnName = ParentFunctions[CE->getOrder()];
4179         if (!CGM.GetGlobalValue(FnName))
4180           continue;
4181         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4182             DiagnosticsEngine::Error,
4183             "Offloading entry for target region in %0 is incorrect: either the "
4184             "address or the ID is invalid.");
4185         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4186         continue;
4187       }
4188       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4189                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4190     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4191                                              OffloadEntryInfoDeviceGlobalVar>(
4192                    std::get<0>(E))) {
4193       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4194           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4195               CE->getFlags());
4196       switch (Flags) {
4197       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4198         if (CGM.getLangOpts().OpenMPIsDevice &&
4199             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4200           continue;
4201         if (!CE->getAddress()) {
4202           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4203               DiagnosticsEngine::Error, "Offloading entry for declare target "
4204                                         "variable %0 is incorrect: the "
4205                                         "address is invalid.");
4206           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4207           continue;
4208         }
4209         // The vaiable has no definition - no need to add the entry.
4210         if (CE->getVarSize().isZero())
4211           continue;
4212         break;
4213       }
4214       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4215         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4216                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4217                "Declaret target link address is set.");
4218         if (CGM.getLangOpts().OpenMPIsDevice)
4219           continue;
4220         if (!CE->getAddress()) {
4221           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4222               DiagnosticsEngine::Error,
4223               "Offloading entry for declare target variable is incorrect: the "
4224               "address is invalid.");
4225           CGM.getDiags().Report(DiagID);
4226           continue;
4227         }
4228         break;
4229       }
4230       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4231                          CE->getVarSize().getQuantity(), Flags,
4232                          CE->getLinkage());
4233     } else {
4234       llvm_unreachable("Unsupported entry kind.");
4235     }
4236   }
4237 }
4238 
4239 /// Loads all the offload entries information from the host IR
4240 /// metadata.
4241 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4242   // If we are in target mode, load the metadata from the host IR. This code has
4243   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4244 
4245   if (!CGM.getLangOpts().OpenMPIsDevice)
4246     return;
4247 
4248   if (CGM.getLangOpts().OMPHostIRFile.empty())
4249     return;
4250 
4251   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4252   if (auto EC = Buf.getError()) {
4253     CGM.getDiags().Report(diag::err_cannot_open_file)
4254         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4255     return;
4256   }
4257 
4258   llvm::LLVMContext C;
4259   auto ME = expectedToErrorOrAndEmitErrors(
4260       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4261 
4262   if (auto EC = ME.getError()) {
4263     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4264         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4265     CGM.getDiags().Report(DiagID)
4266         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4267     return;
4268   }
4269 
4270   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4271   if (!MD)
4272     return;
4273 
4274   for (llvm::MDNode *MN : MD->operands()) {
4275     auto &&GetMDInt = [MN](unsigned Idx) {
4276       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4277       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4278     };
4279 
4280     auto &&GetMDString = [MN](unsigned Idx) {
4281       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4282       return V->getString();
4283     };
4284 
4285     switch (GetMDInt(0)) {
4286     default:
4287       llvm_unreachable("Unexpected metadata!");
4288       break;
4289     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4290         OffloadingEntryInfoTargetRegion:
4291       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4292           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4293           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4294           /*Order=*/GetMDInt(5));
4295       break;
4296     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4297         OffloadingEntryInfoDeviceGlobalVar:
4298       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4299           /*MangledName=*/GetMDString(1),
4300           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4301               /*Flags=*/GetMDInt(2)),
4302           /*Order=*/GetMDInt(3));
4303       break;
4304     }
4305   }
4306 }
4307 
4308 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4309   if (!KmpRoutineEntryPtrTy) {
4310     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4311     ASTContext &C = CGM.getContext();
4312     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4313     FunctionProtoType::ExtProtoInfo EPI;
4314     KmpRoutineEntryPtrQTy = C.getPointerType(
4315         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4316     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4317   }
4318 }
4319 
4320 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4321   // Make sure the type of the entry is already created. This is the type we
4322   // have to create:
4323   // struct __tgt_offload_entry{
4324   //   void      *addr;       // Pointer to the offload entry info.
4325   //                          // (function or global)
4326   //   char      *name;       // Name of the function or global.
4327   //   size_t     size;       // Size of the entry info (0 if it a function).
4328   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4329   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4330   // };
4331   if (TgtOffloadEntryQTy.isNull()) {
4332     ASTContext &C = CGM.getContext();
4333     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4334     RD->startDefinition();
4335     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4336     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4337     addFieldToRecordDecl(C, RD, C.getSizeType());
4338     addFieldToRecordDecl(
4339         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4340     addFieldToRecordDecl(
4341         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4342     RD->completeDefinition();
4343     RD->addAttr(PackedAttr::CreateImplicit(C));
4344     TgtOffloadEntryQTy = C.getRecordType(RD);
4345   }
4346   return TgtOffloadEntryQTy;
4347 }
4348 
4349 namespace {
4350 struct PrivateHelpersTy {
4351   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4352                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4353       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4354         PrivateElemInit(PrivateElemInit) {}
4355   const Expr *OriginalRef = nullptr;
4356   const VarDecl *Original = nullptr;
4357   const VarDecl *PrivateCopy = nullptr;
4358   const VarDecl *PrivateElemInit = nullptr;
4359 };
4360 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4361 } // anonymous namespace
4362 
4363 static RecordDecl *
4364 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4365   if (!Privates.empty()) {
4366     ASTContext &C = CGM.getContext();
4367     // Build struct .kmp_privates_t. {
4368     //         /*  private vars  */
4369     //       };
4370     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4371     RD->startDefinition();
4372     for (const auto &Pair : Privates) {
4373       const VarDecl *VD = Pair.second.Original;
4374       QualType Type = VD->getType().getNonReferenceType();
4375       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4376       if (VD->hasAttrs()) {
4377         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4378              E(VD->getAttrs().end());
4379              I != E; ++I)
4380           FD->addAttr(*I);
4381       }
4382     }
4383     RD->completeDefinition();
4384     return RD;
4385   }
4386   return nullptr;
4387 }
4388 
4389 static RecordDecl *
4390 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4391                          QualType KmpInt32Ty,
4392                          QualType KmpRoutineEntryPointerQTy) {
4393   ASTContext &C = CGM.getContext();
4394   // Build struct kmp_task_t {
4395   //         void *              shareds;
4396   //         kmp_routine_entry_t routine;
4397   //         kmp_int32           part_id;
4398   //         kmp_cmplrdata_t data1;
4399   //         kmp_cmplrdata_t data2;
4400   // For taskloops additional fields:
4401   //         kmp_uint64          lb;
4402   //         kmp_uint64          ub;
4403   //         kmp_int64           st;
4404   //         kmp_int32           liter;
4405   //         void *              reductions;
4406   //       };
4407   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4408   UD->startDefinition();
4409   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4410   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4411   UD->completeDefinition();
4412   QualType KmpCmplrdataTy = C.getRecordType(UD);
4413   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4414   RD->startDefinition();
4415   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4416   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4417   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4418   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4419   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4420   if (isOpenMPTaskLoopDirective(Kind)) {
4421     QualType KmpUInt64Ty =
4422         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4423     QualType KmpInt64Ty =
4424         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4425     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4426     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4427     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4428     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4429     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4430   }
4431   RD->completeDefinition();
4432   return RD;
4433 }
4434 
4435 static RecordDecl *
4436 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4437                                      ArrayRef<PrivateDataTy> Privates) {
4438   ASTContext &C = CGM.getContext();
4439   // Build struct kmp_task_t_with_privates {
4440   //         kmp_task_t task_data;
4441   //         .kmp_privates_t. privates;
4442   //       };
4443   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4444   RD->startDefinition();
4445   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4446   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4447     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4448   RD->completeDefinition();
4449   return RD;
4450 }
4451 
4452 /// Emit a proxy function which accepts kmp_task_t as the second
4453 /// argument.
4454 /// \code
4455 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4456 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4457 ///   For taskloops:
4458 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4459 ///   tt->reductions, tt->shareds);
4460 ///   return 0;
4461 /// }
4462 /// \endcode
4463 static llvm::Function *
4464 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4465                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4466                       QualType KmpTaskTWithPrivatesPtrQTy,
4467                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4468                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4469                       llvm::Value *TaskPrivatesMap) {
4470   ASTContext &C = CGM.getContext();
4471   FunctionArgList Args;
4472   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4473                             ImplicitParamDecl::Other);
4474   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4475                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4476                                 ImplicitParamDecl::Other);
4477   Args.push_back(&GtidArg);
4478   Args.push_back(&TaskTypeArg);
4479   const auto &TaskEntryFnInfo =
4480       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4481   llvm::FunctionType *TaskEntryTy =
4482       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4483   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4484   auto *TaskEntry = llvm::Function::Create(
4485       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4486   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4487   TaskEntry->setDoesNotRecurse();
4488   CodeGenFunction CGF(CGM);
4489   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4490                     Loc, Loc);
4491 
4492   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4493   // tt,
4494   // For taskloops:
4495   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4496   // tt->task_data.shareds);
4497   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4498       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4499   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4500       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4501       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4502   const auto *KmpTaskTWithPrivatesQTyRD =
4503       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4504   LValue Base =
4505       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4506   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4507   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4508   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4509   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4510 
4511   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4512   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4513   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4515       CGF.ConvertTypeForMem(SharedsPtrTy));
4516 
4517   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4518   llvm::Value *PrivatesParam;
4519   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4520     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4521     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4523   } else {
4524     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4525   }
4526 
4527   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4528                                TaskPrivatesMap,
4529                                CGF.Builder
4530                                    .CreatePointerBitCastOrAddrSpaceCast(
4531                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4532                                    .getPointer()};
4533   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4534                                           std::end(CommonArgs));
4535   if (isOpenMPTaskLoopDirective(Kind)) {
4536     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4537     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4538     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4539     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4540     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4541     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4542     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4543     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4544     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4545     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4546     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4547     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4548     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4549     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4550     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4551     CallArgs.push_back(LBParam);
4552     CallArgs.push_back(UBParam);
4553     CallArgs.push_back(StParam);
4554     CallArgs.push_back(LIParam);
4555     CallArgs.push_back(RParam);
4556   }
4557   CallArgs.push_back(SharedsParam);
4558 
4559   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4560                                                   CallArgs);
4561   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4562                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4563   CGF.FinishFunction();
4564   return TaskEntry;
4565 }
4566 
4567 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4568                                             SourceLocation Loc,
4569                                             QualType KmpInt32Ty,
4570                                             QualType KmpTaskTWithPrivatesPtrQTy,
4571                                             QualType KmpTaskTWithPrivatesQTy) {
4572   ASTContext &C = CGM.getContext();
4573   FunctionArgList Args;
4574   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4575                             ImplicitParamDecl::Other);
4576   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4577                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4578                                 ImplicitParamDecl::Other);
4579   Args.push_back(&GtidArg);
4580   Args.push_back(&TaskTypeArg);
4581   const auto &DestructorFnInfo =
4582       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4583   llvm::FunctionType *DestructorFnTy =
4584       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4585   std::string Name =
4586       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4587   auto *DestructorFn =
4588       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4589                              Name, &CGM.getModule());
4590   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4591                                     DestructorFnInfo);
4592   DestructorFn->setDoesNotRecurse();
4593   CodeGenFunction CGF(CGM);
4594   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4595                     Args, Loc, Loc);
4596 
4597   LValue Base = CGF.EmitLoadOfPointerLValue(
4598       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4599       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4600   const auto *KmpTaskTWithPrivatesQTyRD =
4601       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4602   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4603   Base = CGF.EmitLValueForField(Base, *FI);
4604   for (const auto *Field :
4605        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4606     if (QualType::DestructionKind DtorKind =
4607             Field->getType().isDestructedType()) {
4608       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4609       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4610     }
4611   }
4612   CGF.FinishFunction();
4613   return DestructorFn;
4614 }
4615 
4616 /// Emit a privates mapping function for correct handling of private and
4617 /// firstprivate variables.
4618 /// \code
4619 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4620 /// **noalias priv1,...,  <tyn> **noalias privn) {
4621 ///   *priv1 = &.privates.priv1;
4622 ///   ...;
4623 ///   *privn = &.privates.privn;
4624 /// }
4625 /// \endcode
4626 static llvm::Value *
4627 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4628                                ArrayRef<const Expr *> PrivateVars,
4629                                ArrayRef<const Expr *> FirstprivateVars,
4630                                ArrayRef<const Expr *> LastprivateVars,
4631                                QualType PrivatesQTy,
4632                                ArrayRef<PrivateDataTy> Privates) {
4633   ASTContext &C = CGM.getContext();
4634   FunctionArgList Args;
4635   ImplicitParamDecl TaskPrivatesArg(
4636       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4637       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4638       ImplicitParamDecl::Other);
4639   Args.push_back(&TaskPrivatesArg);
4640   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4641   unsigned Counter = 1;
4642   for (const Expr *E : PrivateVars) {
4643     Args.push_back(ImplicitParamDecl::Create(
4644         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4645         C.getPointerType(C.getPointerType(E->getType()))
4646             .withConst()
4647             .withRestrict(),
4648         ImplicitParamDecl::Other));
4649     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4650     PrivateVarsPos[VD] = Counter;
4651     ++Counter;
4652   }
4653   for (const Expr *E : FirstprivateVars) {
4654     Args.push_back(ImplicitParamDecl::Create(
4655         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4656         C.getPointerType(C.getPointerType(E->getType()))
4657             .withConst()
4658             .withRestrict(),
4659         ImplicitParamDecl::Other));
4660     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4661     PrivateVarsPos[VD] = Counter;
4662     ++Counter;
4663   }
4664   for (const Expr *E : LastprivateVars) {
4665     Args.push_back(ImplicitParamDecl::Create(
4666         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4667         C.getPointerType(C.getPointerType(E->getType()))
4668             .withConst()
4669             .withRestrict(),
4670         ImplicitParamDecl::Other));
4671     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4672     PrivateVarsPos[VD] = Counter;
4673     ++Counter;
4674   }
4675   const auto &TaskPrivatesMapFnInfo =
4676       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4677   llvm::FunctionType *TaskPrivatesMapTy =
4678       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4679   std::string Name =
4680       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4681   auto *TaskPrivatesMap = llvm::Function::Create(
4682       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4683       &CGM.getModule());
4684   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4685                                     TaskPrivatesMapFnInfo);
4686   if (CGM.getLangOpts().Optimize) {
4687     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4688     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4689     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4690   }
4691   CodeGenFunction CGF(CGM);
4692   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4693                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4694 
4695   // *privi = &.privates.privi;
4696   LValue Base = CGF.EmitLoadOfPointerLValue(
4697       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4698       TaskPrivatesArg.getType()->castAs<PointerType>());
4699   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4700   Counter = 0;
4701   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4702     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4703     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4704     LValue RefLVal =
4705         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4706     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4707         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4708     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4709     ++Counter;
4710   }
4711   CGF.FinishFunction();
4712   return TaskPrivatesMap;
4713 }
4714 
4715 /// Emit initialization for private variables in task-based directives.
4716 static void emitPrivatesInit(CodeGenFunction &CGF,
4717                              const OMPExecutableDirective &D,
4718                              Address KmpTaskSharedsPtr, LValue TDBase,
4719                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4720                              QualType SharedsTy, QualType SharedsPtrTy,
4721                              const OMPTaskDataTy &Data,
4722                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4723   ASTContext &C = CGF.getContext();
4724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4725   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4726   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4727                                  ? OMPD_taskloop
4728                                  : OMPD_task;
4729   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4730   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4731   LValue SrcBase;
4732   bool IsTargetTask =
4733       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4734       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4735   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4736   // PointersArray and SizesArray. The original variables for these arrays are
4737   // not captured and we get their addresses explicitly.
4738   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4739       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4740     SrcBase = CGF.MakeAddrLValue(
4741         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4742             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4743         SharedsTy);
4744   }
4745   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4746   for (const PrivateDataTy &Pair : Privates) {
4747     const VarDecl *VD = Pair.second.PrivateCopy;
4748     const Expr *Init = VD->getAnyInitializer();
4749     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4750                              !CGF.isTrivialInitializer(Init)))) {
4751       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4752       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4753         const VarDecl *OriginalVD = Pair.second.Original;
4754         // Check if the variable is the target-based BasePointersArray,
4755         // PointersArray or SizesArray.
4756         LValue SharedRefLValue;
4757         QualType Type = PrivateLValue.getType();
4758         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4759         if (IsTargetTask && !SharedField) {
4760           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4761                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4762                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4763                          ->getNumParams() == 0 &&
4764                  isa<TranslationUnitDecl>(
4765                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4766                          ->getDeclContext()) &&
4767                  "Expected artificial target data variable.");
4768           SharedRefLValue =
4769               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4770         } else if (ForDup) {
4771           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4772           SharedRefLValue = CGF.MakeAddrLValue(
4773               Address(SharedRefLValue.getPointer(CGF),
4774                       C.getDeclAlign(OriginalVD)),
4775               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4776               SharedRefLValue.getTBAAInfo());
4777         } else {
4778           InlinedOpenMPRegionRAII Region(
4779               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4780               /*HasCancel=*/false);
4781           SharedRefLValue =  CGF.EmitLValue(Pair.second.OriginalRef);
4782         }
4783         if (Type->isArrayType()) {
4784           // Initialize firstprivate array.
4785           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4786             // Perform simple memcpy.
4787             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4788           } else {
4789             // Initialize firstprivate array using element-by-element
4790             // initialization.
4791             CGF.EmitOMPAggregateAssign(
4792                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4793                 Type,
4794                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4795                                                   Address SrcElement) {
4796                   // Clean up any temporaries needed by the initialization.
4797                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4798                   InitScope.addPrivate(
4799                       Elem, [SrcElement]() -> Address { return SrcElement; });
4800                   (void)InitScope.Privatize();
4801                   // Emit initialization for single element.
4802                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4803                       CGF, &CapturesInfo);
4804                   CGF.EmitAnyExprToMem(Init, DestElement,
4805                                        Init->getType().getQualifiers(),
4806                                        /*IsInitializer=*/false);
4807                 });
4808           }
4809         } else {
4810           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4811           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4812             return SharedRefLValue.getAddress(CGF);
4813           });
4814           (void)InitScope.Privatize();
4815           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4816           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4817                              /*capturedByInit=*/false);
4818         }
4819       } else {
4820         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4821       }
4822     }
4823     ++FI;
4824   }
4825 }
4826 
4827 /// Check if duplication function is required for taskloops.
4828 static bool checkInitIsRequired(CodeGenFunction &CGF,
4829                                 ArrayRef<PrivateDataTy> Privates) {
4830   bool InitRequired = false;
4831   for (const PrivateDataTy &Pair : Privates) {
4832     const VarDecl *VD = Pair.second.PrivateCopy;
4833     const Expr *Init = VD->getAnyInitializer();
4834     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4835                                     !CGF.isTrivialInitializer(Init));
4836     if (InitRequired)
4837       break;
4838   }
4839   return InitRequired;
4840 }
4841 
4842 
4843 /// Emit task_dup function (for initialization of
4844 /// private/firstprivate/lastprivate vars and last_iter flag)
4845 /// \code
4846 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4847 /// lastpriv) {
4848 /// // setup lastprivate flag
4849 ///    task_dst->last = lastpriv;
4850 /// // could be constructor calls here...
4851 /// }
4852 /// \endcode
4853 static llvm::Value *
4854 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4855                     const OMPExecutableDirective &D,
4856                     QualType KmpTaskTWithPrivatesPtrQTy,
4857                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4858                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4859                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4860                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4861   ASTContext &C = CGM.getContext();
4862   FunctionArgList Args;
4863   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4864                            KmpTaskTWithPrivatesPtrQTy,
4865                            ImplicitParamDecl::Other);
4866   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4867                            KmpTaskTWithPrivatesPtrQTy,
4868                            ImplicitParamDecl::Other);
4869   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4870                                 ImplicitParamDecl::Other);
4871   Args.push_back(&DstArg);
4872   Args.push_back(&SrcArg);
4873   Args.push_back(&LastprivArg);
4874   const auto &TaskDupFnInfo =
4875       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4876   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4877   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4878   auto *TaskDup = llvm::Function::Create(
4879       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4880   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4881   TaskDup->setDoesNotRecurse();
4882   CodeGenFunction CGF(CGM);
4883   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4884                     Loc);
4885 
4886   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4887       CGF.GetAddrOfLocalVar(&DstArg),
4888       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4889   // task_dst->liter = lastpriv;
4890   if (WithLastIter) {
4891     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4892     LValue Base = CGF.EmitLValueForField(
4893         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4894     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4895     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4896         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4897     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4898   }
4899 
4900   // Emit initial values for private copies (if any).
4901   assert(!Privates.empty());
4902   Address KmpTaskSharedsPtr = Address::invalid();
4903   if (!Data.FirstprivateVars.empty()) {
4904     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4905         CGF.GetAddrOfLocalVar(&SrcArg),
4906         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4907     LValue Base = CGF.EmitLValueForField(
4908         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4909     KmpTaskSharedsPtr = Address(
4910         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4911                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4912                                                   KmpTaskTShareds)),
4913                              Loc),
4914         CGF.getNaturalTypeAlignment(SharedsTy));
4915   }
4916   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4917                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4918   CGF.FinishFunction();
4919   return TaskDup;
4920 }
4921 
4922 /// Checks if destructor function is required to be generated.
4923 /// \return true if cleanups are required, false otherwise.
4924 static bool
4925 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4926   bool NeedsCleanup = false;
4927   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4928   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4929   for (const FieldDecl *FD : PrivateRD->fields()) {
4930     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4931     if (NeedsCleanup)
4932       break;
4933   }
4934   return NeedsCleanup;
4935 }
4936 
4937 CGOpenMPRuntime::TaskResultTy
4938 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4939                               const OMPExecutableDirective &D,
4940                               llvm::Function *TaskFunction, QualType SharedsTy,
4941                               Address Shareds, const OMPTaskDataTy &Data) {
4942   ASTContext &C = CGM.getContext();
4943   llvm::SmallVector<PrivateDataTy, 4> Privates;
4944   // Aggregate privates and sort them by the alignment.
4945   const auto *I = Data.PrivateCopies.begin();
4946   for (const Expr *E : Data.PrivateVars) {
4947     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4948     Privates.emplace_back(
4949         C.getDeclAlign(VD),
4950         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4951                          /*PrivateElemInit=*/nullptr));
4952     ++I;
4953   }
4954   I = Data.FirstprivateCopies.begin();
4955   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4956   for (const Expr *E : Data.FirstprivateVars) {
4957     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4958     Privates.emplace_back(
4959         C.getDeclAlign(VD),
4960         PrivateHelpersTy(
4961             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4962             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4963     ++I;
4964     ++IElemInitRef;
4965   }
4966   I = Data.LastprivateCopies.begin();
4967   for (const Expr *E : Data.LastprivateVars) {
4968     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4969     Privates.emplace_back(
4970         C.getDeclAlign(VD),
4971         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4972                          /*PrivateElemInit=*/nullptr));
4973     ++I;
4974   }
4975   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4976     return L.first > R.first;
4977   });
4978   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4979   // Build type kmp_routine_entry_t (if not built yet).
4980   emitKmpRoutineEntryT(KmpInt32Ty);
4981   // Build type kmp_task_t (if not built yet).
4982   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4983     if (SavedKmpTaskloopTQTy.isNull()) {
4984       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4985           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4986     }
4987     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4988   } else {
4989     assert((D.getDirectiveKind() == OMPD_task ||
4990             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4991             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4992            "Expected taskloop, task or target directive");
4993     if (SavedKmpTaskTQTy.isNull()) {
4994       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4995           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4996     }
4997     KmpTaskTQTy = SavedKmpTaskTQTy;
4998   }
4999   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5000   // Build particular struct kmp_task_t for the given task.
5001   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5002       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5003   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5004   QualType KmpTaskTWithPrivatesPtrQTy =
5005       C.getPointerType(KmpTaskTWithPrivatesQTy);
5006   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5007   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5008       KmpTaskTWithPrivatesTy->getPointerTo();
5009   llvm::Value *KmpTaskTWithPrivatesTySize =
5010       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5011   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5012 
5013   // Emit initial values for private copies (if any).
5014   llvm::Value *TaskPrivatesMap = nullptr;
5015   llvm::Type *TaskPrivatesMapTy =
5016       std::next(TaskFunction->arg_begin(), 3)->getType();
5017   if (!Privates.empty()) {
5018     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5019     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5020         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5021         FI->getType(), Privates);
5022     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5023         TaskPrivatesMap, TaskPrivatesMapTy);
5024   } else {
5025     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5026         cast<llvm::PointerType>(TaskPrivatesMapTy));
5027   }
5028   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5029   // kmp_task_t *tt);
5030   llvm::Function *TaskEntry = emitProxyTaskFunction(
5031       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5032       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5033       TaskPrivatesMap);
5034 
5035   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5036   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5037   // kmp_routine_entry_t *task_entry);
5038   // Task flags. Format is taken from
5039   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5040   // description of kmp_tasking_flags struct.
5041   enum {
5042     TiedFlag = 0x1,
5043     FinalFlag = 0x2,
5044     DestructorsFlag = 0x8,
5045     PriorityFlag = 0x20,
5046     DetachableFlag = 0x40,
5047   };
5048   unsigned Flags = Data.Tied ? TiedFlag : 0;
5049   bool NeedsCleanup = false;
5050   if (!Privates.empty()) {
5051     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5052     if (NeedsCleanup)
5053       Flags = Flags | DestructorsFlag;
5054   }
5055   if (Data.Priority.getInt())
5056     Flags = Flags | PriorityFlag;
5057   if (D.hasClausesOfKind<OMPDetachClause>())
5058     Flags = Flags | DetachableFlag;
5059   llvm::Value *TaskFlags =
5060       Data.Final.getPointer()
5061           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5062                                      CGF.Builder.getInt32(FinalFlag),
5063                                      CGF.Builder.getInt32(/*C=*/0))
5064           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5065   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5066   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5067   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5068       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5069       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5070           TaskEntry, KmpRoutineEntryPtrTy)};
5071   llvm::Value *NewTask;
5072   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5073     // Check if we have any device clause associated with the directive.
5074     const Expr *Device = nullptr;
5075     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5076       Device = C->getDevice();
5077     // Emit device ID if any otherwise use default value.
5078     llvm::Value *DeviceID;
5079     if (Device)
5080       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5081                                            CGF.Int64Ty, /*isSigned=*/true);
5082     else
5083       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5084     AllocArgs.push_back(DeviceID);
5085     NewTask = CGF.EmitRuntimeCall(
5086       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5087   } else {
5088     NewTask = CGF.EmitRuntimeCall(
5089       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5090   }
5091   // Emit detach clause initialization.
5092   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
5093   // task_descriptor);
5094   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
5095     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
5096     LValue EvtLVal = CGF.EmitLValue(Evt);
5097 
5098     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
5099     // int gtid, kmp_task_t *task);
5100     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
5101     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
5102     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
5103     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
5104         createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event),
5105         {Loc, Tid, NewTask});
5106     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
5107                                       Evt->getExprLoc());
5108     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
5109   }
5110   llvm::Value *NewTaskNewTaskTTy =
5111       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5112           NewTask, KmpTaskTWithPrivatesPtrTy);
5113   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5114                                                KmpTaskTWithPrivatesQTy);
5115   LValue TDBase =
5116       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5117   // Fill the data in the resulting kmp_task_t record.
5118   // Copy shareds if there are any.
5119   Address KmpTaskSharedsPtr = Address::invalid();
5120   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5121     KmpTaskSharedsPtr =
5122         Address(CGF.EmitLoadOfScalar(
5123                     CGF.EmitLValueForField(
5124                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5125                                            KmpTaskTShareds)),
5126                     Loc),
5127                 CGF.getNaturalTypeAlignment(SharedsTy));
5128     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5129     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5130     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5131   }
5132   // Emit initial values for private copies (if any).
5133   TaskResultTy Result;
5134   if (!Privates.empty()) {
5135     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5136                      SharedsTy, SharedsPtrTy, Data, Privates,
5137                      /*ForDup=*/false);
5138     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5139         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5140       Result.TaskDupFn = emitTaskDupFunction(
5141           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5142           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5143           /*WithLastIter=*/!Data.LastprivateVars.empty());
5144     }
5145   }
5146   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5147   enum { Priority = 0, Destructors = 1 };
5148   // Provide pointer to function with destructors for privates.
5149   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5150   const RecordDecl *KmpCmplrdataUD =
5151       (*FI)->getType()->getAsUnionType()->getDecl();
5152   if (NeedsCleanup) {
5153     llvm::Value *DestructorFn = emitDestructorsFunction(
5154         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5155         KmpTaskTWithPrivatesQTy);
5156     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5157     LValue DestructorsLV = CGF.EmitLValueForField(
5158         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5159     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5160                               DestructorFn, KmpRoutineEntryPtrTy),
5161                           DestructorsLV);
5162   }
5163   // Set priority.
5164   if (Data.Priority.getInt()) {
5165     LValue Data2LV = CGF.EmitLValueForField(
5166         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5167     LValue PriorityLV = CGF.EmitLValueForField(
5168         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5169     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5170   }
5171   Result.NewTask = NewTask;
5172   Result.TaskEntry = TaskEntry;
5173   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5174   Result.TDBase = TDBase;
5175   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5176   return Result;
5177 }
5178 
5179 namespace {
5180 /// Dependence kind for RTL.
5181 enum RTLDependenceKindTy {
5182   DepIn = 0x01,
5183   DepInOut = 0x3,
5184   DepMutexInOutSet = 0x4
5185 };
5186 /// Fields ids in kmp_depend_info record.
5187 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5188 } // namespace
5189 
5190 /// Translates internal dependency kind into the runtime kind.
5191 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5192   RTLDependenceKindTy DepKind;
5193   switch (K) {
5194   case OMPC_DEPEND_in:
5195     DepKind = DepIn;
5196     break;
5197   // Out and InOut dependencies must use the same code.
5198   case OMPC_DEPEND_out:
5199   case OMPC_DEPEND_inout:
5200     DepKind = DepInOut;
5201     break;
5202   case OMPC_DEPEND_mutexinoutset:
5203     DepKind = DepMutexInOutSet;
5204     break;
5205   case OMPC_DEPEND_source:
5206   case OMPC_DEPEND_sink:
5207   case OMPC_DEPEND_depobj:
5208   case OMPC_DEPEND_unknown:
5209     llvm_unreachable("Unknown task dependence type");
5210   }
5211   return DepKind;
5212 }
5213 
5214 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5215 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5216                            QualType &FlagsTy) {
5217   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5218   if (KmpDependInfoTy.isNull()) {
5219     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5220     KmpDependInfoRD->startDefinition();
5221     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5222     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5223     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5224     KmpDependInfoRD->completeDefinition();
5225     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5226   }
5227 }
5228 
5229 std::pair<llvm::Value *, LValue>
5230 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5231                                    SourceLocation Loc) {
5232   ASTContext &C = CGM.getContext();
5233   QualType FlagsTy;
5234   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5235   RecordDecl *KmpDependInfoRD =
5236       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5237   LValue Base = CGF.EmitLoadOfPointerLValue(
5238       DepobjLVal.getAddress(CGF),
5239       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5240   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5241   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5242           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5243   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5244                             Base.getTBAAInfo());
5245   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5246       Addr.getPointer(),
5247       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5248   LValue NumDepsBase = CGF.MakeAddrLValue(
5249       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5250       Base.getBaseInfo(), Base.getTBAAInfo());
5251   // NumDeps = deps[i].base_addr;
5252   LValue BaseAddrLVal = CGF.EmitLValueForField(
5253       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5254   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5255   return std::make_pair(NumDeps, Base);
5256 }
5257 
5258 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5259     CodeGenFunction &CGF,
5260     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
5261     bool ForDepobj, SourceLocation Loc) {
5262   // Process list of dependencies.
5263   ASTContext &C = CGM.getContext();
5264   Address DependenciesArray = Address::invalid();
5265   unsigned NumDependencies = Dependencies.size();
5266   llvm::Value *NumOfElements = nullptr;
5267   if (NumDependencies) {
5268     QualType FlagsTy;
5269     getDependTypes(C, KmpDependInfoTy, FlagsTy);
5270     RecordDecl *KmpDependInfoRD =
5271         cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5272     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5273     unsigned NumDepobjDependecies = 0;
5274     SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs;
5275     llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5276     // Calculate number of depobj dependecies.
5277     for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair :
5278          Dependencies) {
5279       if (Pair.first != OMPC_DEPEND_depobj)
5280         continue;
5281       LValue DepobjLVal = CGF.EmitLValue(Pair.second);
5282       llvm::Value *NumDeps;
5283       LValue Base;
5284       std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5285       NumOfDepobjElements =
5286           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps);
5287       Depobjs.emplace_back(NumDeps, Base);
5288       ++NumDepobjDependecies;
5289     }
5290 
5291     QualType KmpDependInfoArrayTy;
5292     // Define type kmp_depend_info[<Dependencies.size()>];
5293     // For depobj reserve one extra element to store the number of elements.
5294     // It is required to handle depobj(x) update(in) construct.
5295     // kmp_depend_info[<Dependencies.size()>] deps;
5296     if (ForDepobj) {
5297       assert(NumDepobjDependecies == 0 &&
5298              "depobj dependency kind is not expected in depobj directive.");
5299       KmpDependInfoArrayTy = C.getConstantArrayType(
5300           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5301           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5302       // Need to allocate on the dynamic memory.
5303       llvm::Value *ThreadID = getThreadID(CGF, Loc);
5304       // Use default allocator.
5305       llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5306       CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy);
5307       CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5308       llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align));
5309       llvm::Value *Args[] = {ThreadID, Size, Allocator};
5310 
5311       llvm::Value *Addr = CGF.EmitRuntimeCall(
5312           createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5313       Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5314           Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo());
5315       DependenciesArray = Address(Addr, Align);
5316       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5317                                              /*isSigned=*/false);
5318     } else if (NumDepobjDependecies > 0) {
5319       NumOfElements = CGF.Builder.CreateNUWAdd(
5320           NumOfDepobjElements,
5321           llvm::ConstantInt::get(CGM.IntPtrTy,
5322                                  NumDependencies - NumDepobjDependecies,
5323                                  /*isSigned=*/false));
5324       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5325                                                 /*isSigned=*/false);
5326       OpaqueValueExpr OVE(
5327           Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5328           VK_RValue);
5329       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5330                                                     RValue::get(NumOfElements));
5331       KmpDependInfoArrayTy =
5332           C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5333                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5334       // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5335       // Properly emit variable-sized array.
5336       auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5337                                            ImplicitParamDecl::Other);
5338       CGF.EmitVarDecl(*PD);
5339       DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5340     } else {
5341       KmpDependInfoArrayTy = C.getConstantArrayType(
5342           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5343           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5344       DependenciesArray =
5345           CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5346       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5347                                              /*isSigned=*/false);
5348     }
5349     if (ForDepobj) {
5350       // Write number of elements in the first element of array for depobj.
5351       llvm::Value *NumVal =
5352           llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5353       LValue Base = CGF.MakeAddrLValue(
5354           CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0),
5355           KmpDependInfoTy);
5356       // deps[i].base_addr = NumDependencies;
5357       LValue BaseAddrLVal = CGF.EmitLValueForField(
5358           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5359       CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal);
5360     }
5361     unsigned Pos = ForDepobj ? 1 : 0;
5362     for (unsigned I = 0; I < NumDependencies; ++I) {
5363       if (Dependencies[I].first == OMPC_DEPEND_depobj)
5364         continue;
5365       const Expr *E = Dependencies[I].second;
5366       const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
5367       llvm::Value *Addr;
5368       if (OASE) {
5369         const Expr *Base = OASE->getBase();
5370         Addr = CGF.EmitScalarExpr(Base);
5371       } else {
5372         Addr = CGF.EmitLValue(E).getPointer(CGF);
5373       }
5374       llvm::Value *Size;
5375       QualType Ty = E->getType();
5376       if (OASE) {
5377         Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
5378         for (const Expr *SE : OASE->getDimensions()) {
5379            llvm::Value *Sz = CGF.EmitScalarExpr(SE);
5380            Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
5381                                     CGF.getContext().getSizeType(),
5382                                     SE->getExprLoc());
5383            Size = CGF.Builder.CreateNUWMul(Size, Sz);
5384         }
5385       } else if (const auto *ASE =
5386                      dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5387         LValue UpAddrLVal =
5388             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5389         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5390             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5391         llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
5392         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5393         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5394       } else {
5395         Size = CGF.getTypeSize(Ty);
5396       }
5397       LValue Base;
5398       if (NumDepobjDependecies > 0) {
5399         Base = CGF.MakeAddrLValue(
5400             CGF.Builder.CreateConstGEP(DependenciesArray, Pos),
5401             KmpDependInfoTy);
5402       } else {
5403         Base = CGF.MakeAddrLValue(
5404             CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos),
5405             KmpDependInfoTy);
5406       }
5407       // deps[i].base_addr = &<Dependencies[i].second>;
5408       LValue BaseAddrLVal = CGF.EmitLValueForField(
5409           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5410       CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
5411                             BaseAddrLVal);
5412       // deps[i].len = sizeof(<Dependencies[i].second>);
5413       LValue LenLVal = CGF.EmitLValueForField(
5414           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5415       CGF.EmitStoreOfScalar(Size, LenLVal);
5416       // deps[i].flags = <Dependencies[i].first>;
5417       RTLDependenceKindTy DepKind =
5418           translateDependencyKind(Dependencies[I].first);
5419       LValue FlagsLVal = CGF.EmitLValueForField(
5420           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5421       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5422                             FlagsLVal);
5423       ++Pos;
5424     }
5425     // Copy final depobj arrays.
5426     if (NumDepobjDependecies > 0) {
5427       llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5428       Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos);
5429       for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) {
5430         llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first);
5431         CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size);
5432         Addr =
5433             Address(CGF.Builder.CreateGEP(
5434                         Addr.getElementType(), Addr.getPointer(), Pair.first),
5435                     DependenciesArray.getAlignment().alignmentOfArrayElement(
5436                         C.getTypeSizeInChars(KmpDependInfoTy)));
5437       }
5438       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5439           DependenciesArray, CGF.VoidPtrTy);
5440     } else {
5441       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5442           CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0),
5443           CGF.VoidPtrTy);
5444     }
5445   }
5446   return std::make_pair(NumOfElements, DependenciesArray);
5447 }
5448 
5449 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5450                                         SourceLocation Loc) {
5451   ASTContext &C = CGM.getContext();
5452   QualType FlagsTy;
5453   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5454   LValue Base = CGF.EmitLoadOfPointerLValue(
5455       DepobjLVal.getAddress(CGF),
5456       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5457   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5458   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5459       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5460   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5461       Addr.getPointer(),
5462       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5463   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5464                                                                CGF.VoidPtrTy);
5465   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5466   // Use default allocator.
5467   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5468   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5469 
5470   // _kmpc_free(gtid, addr, nullptr);
5471   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5472 }
5473 
5474 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5475                                        OpenMPDependClauseKind NewDepKind,
5476                                        SourceLocation Loc) {
5477   ASTContext &C = CGM.getContext();
5478   QualType FlagsTy;
5479   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5480   RecordDecl *KmpDependInfoRD =
5481       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5482   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5483   llvm::Value *NumDeps;
5484   LValue Base;
5485   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5486 
5487   Address Begin = Base.getAddress(CGF);
5488   // Cast from pointer to array type to pointer to single element.
5489   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5490   // The basic structure here is a while-do loop.
5491   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5492   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5493   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5494   CGF.EmitBlock(BodyBB);
5495   llvm::PHINode *ElementPHI =
5496       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5497   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5498   Begin = Address(ElementPHI, Begin.getAlignment());
5499   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5500                             Base.getTBAAInfo());
5501   // deps[i].flags = NewDepKind;
5502   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5503   LValue FlagsLVal = CGF.EmitLValueForField(
5504       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5505   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5506                         FlagsLVal);
5507 
5508   // Shift the address forward by one element.
5509   Address ElementNext =
5510       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5511   ElementPHI->addIncoming(ElementNext.getPointer(),
5512                           CGF.Builder.GetInsertBlock());
5513   llvm::Value *IsEmpty =
5514       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5515   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5516   // Done.
5517   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5518 }
5519 
5520 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5521                                    const OMPExecutableDirective &D,
5522                                    llvm::Function *TaskFunction,
5523                                    QualType SharedsTy, Address Shareds,
5524                                    const Expr *IfCond,
5525                                    const OMPTaskDataTy &Data) {
5526   if (!CGF.HaveInsertPoint())
5527     return;
5528 
5529   TaskResultTy Result =
5530       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5531   llvm::Value *NewTask = Result.NewTask;
5532   llvm::Function *TaskEntry = Result.TaskEntry;
5533   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5534   LValue TDBase = Result.TDBase;
5535   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5536   // Process list of dependences.
5537   Address DependenciesArray = Address::invalid();
5538   llvm::Value *NumOfElements;
5539   std::tie(NumOfElements, DependenciesArray) =
5540       emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc);
5541 
5542   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5543   // libcall.
5544   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5545   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5546   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5547   // list is not empty
5548   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5549   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5550   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5551   llvm::Value *DepTaskArgs[7];
5552   if (!Data.Dependences.empty()) {
5553     DepTaskArgs[0] = UpLoc;
5554     DepTaskArgs[1] = ThreadID;
5555     DepTaskArgs[2] = NewTask;
5556     DepTaskArgs[3] = NumOfElements;
5557     DepTaskArgs[4] = DependenciesArray.getPointer();
5558     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5559     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5560   }
5561   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5562                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5563     if (!Data.Tied) {
5564       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5565       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5566       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5567     }
5568     if (!Data.Dependences.empty()) {
5569       CGF.EmitRuntimeCall(
5570           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5571     } else {
5572       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5573                           TaskArgs);
5574     }
5575     // Check if parent region is untied and build return for untied task;
5576     if (auto *Region =
5577             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5578       Region->emitUntiedSwitch(CGF);
5579   };
5580 
5581   llvm::Value *DepWaitTaskArgs[6];
5582   if (!Data.Dependences.empty()) {
5583     DepWaitTaskArgs[0] = UpLoc;
5584     DepWaitTaskArgs[1] = ThreadID;
5585     DepWaitTaskArgs[2] = NumOfElements;
5586     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5587     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5588     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5589   }
5590   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5591                         &Data, &DepWaitTaskArgs,
5592                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5593     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5594     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5595     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5596     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5597     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5598     // is specified.
5599     if (!Data.Dependences.empty())
5600       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5601                           DepWaitTaskArgs);
5602     // Call proxy_task_entry(gtid, new_task);
5603     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5604                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5605       Action.Enter(CGF);
5606       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5607       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5608                                                           OutlinedFnArgs);
5609     };
5610 
5611     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5612     // kmp_task_t *new_task);
5613     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5614     // kmp_task_t *new_task);
5615     RegionCodeGenTy RCG(CodeGen);
5616     CommonActionTy Action(
5617         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5618         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5619     RCG.setAction(Action);
5620     RCG(CGF);
5621   };
5622 
5623   if (IfCond) {
5624     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5625   } else {
5626     RegionCodeGenTy ThenRCG(ThenCodeGen);
5627     ThenRCG(CGF);
5628   }
5629 }
5630 
5631 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5632                                        const OMPLoopDirective &D,
5633                                        llvm::Function *TaskFunction,
5634                                        QualType SharedsTy, Address Shareds,
5635                                        const Expr *IfCond,
5636                                        const OMPTaskDataTy &Data) {
5637   if (!CGF.HaveInsertPoint())
5638     return;
5639   TaskResultTy Result =
5640       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5641   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5642   // libcall.
5643   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5644   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5645   // sched, kmp_uint64 grainsize, void *task_dup);
5646   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5647   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5648   llvm::Value *IfVal;
5649   if (IfCond) {
5650     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5651                                       /*isSigned=*/true);
5652   } else {
5653     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5654   }
5655 
5656   LValue LBLVal = CGF.EmitLValueForField(
5657       Result.TDBase,
5658       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5659   const auto *LBVar =
5660       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5661   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5662                        LBLVal.getQuals(),
5663                        /*IsInitializer=*/true);
5664   LValue UBLVal = CGF.EmitLValueForField(
5665       Result.TDBase,
5666       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5667   const auto *UBVar =
5668       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5669   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5670                        UBLVal.getQuals(),
5671                        /*IsInitializer=*/true);
5672   LValue StLVal = CGF.EmitLValueForField(
5673       Result.TDBase,
5674       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5675   const auto *StVar =
5676       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5677   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5678                        StLVal.getQuals(),
5679                        /*IsInitializer=*/true);
5680   // Store reductions address.
5681   LValue RedLVal = CGF.EmitLValueForField(
5682       Result.TDBase,
5683       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5684   if (Data.Reductions) {
5685     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5686   } else {
5687     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5688                                CGF.getContext().VoidPtrTy);
5689   }
5690   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5691   llvm::Value *TaskArgs[] = {
5692       UpLoc,
5693       ThreadID,
5694       Result.NewTask,
5695       IfVal,
5696       LBLVal.getPointer(CGF),
5697       UBLVal.getPointer(CGF),
5698       CGF.EmitLoadOfScalar(StLVal, Loc),
5699       llvm::ConstantInt::getSigned(
5700           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5701       llvm::ConstantInt::getSigned(
5702           CGF.IntTy, Data.Schedule.getPointer()
5703                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5704                          : NoSchedule),
5705       Data.Schedule.getPointer()
5706           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5707                                       /*isSigned=*/false)
5708           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5709       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5710                              Result.TaskDupFn, CGF.VoidPtrTy)
5711                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5712   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5713 }
5714 
5715 /// Emit reduction operation for each element of array (required for
5716 /// array sections) LHS op = RHS.
5717 /// \param Type Type of array.
5718 /// \param LHSVar Variable on the left side of the reduction operation
5719 /// (references element of array in original variable).
5720 /// \param RHSVar Variable on the right side of the reduction operation
5721 /// (references element of array in original variable).
5722 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5723 /// RHSVar.
5724 static void EmitOMPAggregateReduction(
5725     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5726     const VarDecl *RHSVar,
5727     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5728                                   const Expr *, const Expr *)> &RedOpGen,
5729     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5730     const Expr *UpExpr = nullptr) {
5731   // Perform element-by-element initialization.
5732   QualType ElementTy;
5733   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5734   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5735 
5736   // Drill down to the base element type on both arrays.
5737   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5738   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5739 
5740   llvm::Value *RHSBegin = RHSAddr.getPointer();
5741   llvm::Value *LHSBegin = LHSAddr.getPointer();
5742   // Cast from pointer to array type to pointer to single element.
5743   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5744   // The basic structure here is a while-do loop.
5745   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5746   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5747   llvm::Value *IsEmpty =
5748       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5749   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5750 
5751   // Enter the loop body, making that address the current address.
5752   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5753   CGF.EmitBlock(BodyBB);
5754 
5755   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5756 
5757   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5758       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5759   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5760   Address RHSElementCurrent =
5761       Address(RHSElementPHI,
5762               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5763 
5764   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5765       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5766   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5767   Address LHSElementCurrent =
5768       Address(LHSElementPHI,
5769               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5770 
5771   // Emit copy.
5772   CodeGenFunction::OMPPrivateScope Scope(CGF);
5773   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5774   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5775   Scope.Privatize();
5776   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5777   Scope.ForceCleanup();
5778 
5779   // Shift the address forward by one element.
5780   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5781       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5782   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5783       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5784   // Check whether we've reached the end.
5785   llvm::Value *Done =
5786       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5787   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5788   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5789   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5790 
5791   // Done.
5792   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5793 }
5794 
5795 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5796 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5797 /// UDR combiner function.
5798 static void emitReductionCombiner(CodeGenFunction &CGF,
5799                                   const Expr *ReductionOp) {
5800   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5801     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5802       if (const auto *DRE =
5803               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5804         if (const auto *DRD =
5805                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5806           std::pair<llvm::Function *, llvm::Function *> Reduction =
5807               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5808           RValue Func = RValue::get(Reduction.first);
5809           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5810           CGF.EmitIgnoredExpr(ReductionOp);
5811           return;
5812         }
5813   CGF.EmitIgnoredExpr(ReductionOp);
5814 }
5815 
5816 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5817     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5818     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5819     ArrayRef<const Expr *> ReductionOps) {
5820   ASTContext &C = CGM.getContext();
5821 
5822   // void reduction_func(void *LHSArg, void *RHSArg);
5823   FunctionArgList Args;
5824   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5825                            ImplicitParamDecl::Other);
5826   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5827                            ImplicitParamDecl::Other);
5828   Args.push_back(&LHSArg);
5829   Args.push_back(&RHSArg);
5830   const auto &CGFI =
5831       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5832   std::string Name = getName({"omp", "reduction", "reduction_func"});
5833   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5834                                     llvm::GlobalValue::InternalLinkage, Name,
5835                                     &CGM.getModule());
5836   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5837   Fn->setDoesNotRecurse();
5838   CodeGenFunction CGF(CGM);
5839   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5840 
5841   // Dst = (void*[n])(LHSArg);
5842   // Src = (void*[n])(RHSArg);
5843   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5844       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5845       ArgsType), CGF.getPointerAlign());
5846   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5847       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5848       ArgsType), CGF.getPointerAlign());
5849 
5850   //  ...
5851   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5852   //  ...
5853   CodeGenFunction::OMPPrivateScope Scope(CGF);
5854   auto IPriv = Privates.begin();
5855   unsigned Idx = 0;
5856   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5857     const auto *RHSVar =
5858         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5859     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5860       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5861     });
5862     const auto *LHSVar =
5863         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5864     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5865       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5866     });
5867     QualType PrivTy = (*IPriv)->getType();
5868     if (PrivTy->isVariablyModifiedType()) {
5869       // Get array size and emit VLA type.
5870       ++Idx;
5871       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5872       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5873       const VariableArrayType *VLA =
5874           CGF.getContext().getAsVariableArrayType(PrivTy);
5875       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5876       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5877           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5878       CGF.EmitVariablyModifiedType(PrivTy);
5879     }
5880   }
5881   Scope.Privatize();
5882   IPriv = Privates.begin();
5883   auto ILHS = LHSExprs.begin();
5884   auto IRHS = RHSExprs.begin();
5885   for (const Expr *E : ReductionOps) {
5886     if ((*IPriv)->getType()->isArrayType()) {
5887       // Emit reduction for array section.
5888       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5889       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5890       EmitOMPAggregateReduction(
5891           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5892           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5893             emitReductionCombiner(CGF, E);
5894           });
5895     } else {
5896       // Emit reduction for array subscript or single variable.
5897       emitReductionCombiner(CGF, E);
5898     }
5899     ++IPriv;
5900     ++ILHS;
5901     ++IRHS;
5902   }
5903   Scope.ForceCleanup();
5904   CGF.FinishFunction();
5905   return Fn;
5906 }
5907 
5908 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5909                                                   const Expr *ReductionOp,
5910                                                   const Expr *PrivateRef,
5911                                                   const DeclRefExpr *LHS,
5912                                                   const DeclRefExpr *RHS) {
5913   if (PrivateRef->getType()->isArrayType()) {
5914     // Emit reduction for array section.
5915     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5916     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5917     EmitOMPAggregateReduction(
5918         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5919         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5920           emitReductionCombiner(CGF, ReductionOp);
5921         });
5922   } else {
5923     // Emit reduction for array subscript or single variable.
5924     emitReductionCombiner(CGF, ReductionOp);
5925   }
5926 }
5927 
5928 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5929                                     ArrayRef<const Expr *> Privates,
5930                                     ArrayRef<const Expr *> LHSExprs,
5931                                     ArrayRef<const Expr *> RHSExprs,
5932                                     ArrayRef<const Expr *> ReductionOps,
5933                                     ReductionOptionsTy Options) {
5934   if (!CGF.HaveInsertPoint())
5935     return;
5936 
5937   bool WithNowait = Options.WithNowait;
5938   bool SimpleReduction = Options.SimpleReduction;
5939 
5940   // Next code should be emitted for reduction:
5941   //
5942   // static kmp_critical_name lock = { 0 };
5943   //
5944   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5945   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5946   //  ...
5947   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5948   //  *(Type<n>-1*)rhs[<n>-1]);
5949   // }
5950   //
5951   // ...
5952   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5953   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5954   // RedList, reduce_func, &<lock>)) {
5955   // case 1:
5956   //  ...
5957   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5958   //  ...
5959   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5960   // break;
5961   // case 2:
5962   //  ...
5963   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5964   //  ...
5965   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5966   // break;
5967   // default:;
5968   // }
5969   //
5970   // if SimpleReduction is true, only the next code is generated:
5971   //  ...
5972   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5973   //  ...
5974 
5975   ASTContext &C = CGM.getContext();
5976 
5977   if (SimpleReduction) {
5978     CodeGenFunction::RunCleanupsScope Scope(CGF);
5979     auto IPriv = Privates.begin();
5980     auto ILHS = LHSExprs.begin();
5981     auto IRHS = RHSExprs.begin();
5982     for (const Expr *E : ReductionOps) {
5983       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5984                                   cast<DeclRefExpr>(*IRHS));
5985       ++IPriv;
5986       ++ILHS;
5987       ++IRHS;
5988     }
5989     return;
5990   }
5991 
5992   // 1. Build a list of reduction variables.
5993   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5994   auto Size = RHSExprs.size();
5995   for (const Expr *E : Privates) {
5996     if (E->getType()->isVariablyModifiedType())
5997       // Reserve place for array size.
5998       ++Size;
5999   }
6000   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6001   QualType ReductionArrayTy =
6002       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6003                              /*IndexTypeQuals=*/0);
6004   Address ReductionList =
6005       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6006   auto IPriv = Privates.begin();
6007   unsigned Idx = 0;
6008   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6009     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6010     CGF.Builder.CreateStore(
6011         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6012             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6013         Elem);
6014     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6015       // Store array size.
6016       ++Idx;
6017       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6018       llvm::Value *Size = CGF.Builder.CreateIntCast(
6019           CGF.getVLASize(
6020                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6021               .NumElts,
6022           CGF.SizeTy, /*isSigned=*/false);
6023       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6024                               Elem);
6025     }
6026   }
6027 
6028   // 2. Emit reduce_func().
6029   llvm::Function *ReductionFn = emitReductionFunction(
6030       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6031       LHSExprs, RHSExprs, ReductionOps);
6032 
6033   // 3. Create static kmp_critical_name lock = { 0 };
6034   std::string Name = getName({"reduction"});
6035   llvm::Value *Lock = getCriticalRegionLock(Name);
6036 
6037   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6038   // RedList, reduce_func, &<lock>);
6039   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6040   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6041   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6042   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6043       ReductionList.getPointer(), CGF.VoidPtrTy);
6044   llvm::Value *Args[] = {
6045       IdentTLoc,                             // ident_t *<loc>
6046       ThreadId,                              // i32 <gtid>
6047       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6048       ReductionArrayTySize,                  // size_type sizeof(RedList)
6049       RL,                                    // void *RedList
6050       ReductionFn, // void (*) (void *, void *) <reduce_func>
6051       Lock         // kmp_critical_name *&<lock>
6052   };
6053   llvm::Value *Res = CGF.EmitRuntimeCall(
6054       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6055                                        : OMPRTL__kmpc_reduce),
6056       Args);
6057 
6058   // 5. Build switch(res)
6059   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6060   llvm::SwitchInst *SwInst =
6061       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6062 
6063   // 6. Build case 1:
6064   //  ...
6065   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6066   //  ...
6067   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6068   // break;
6069   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6070   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6071   CGF.EmitBlock(Case1BB);
6072 
6073   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6074   llvm::Value *EndArgs[] = {
6075       IdentTLoc, // ident_t *<loc>
6076       ThreadId,  // i32 <gtid>
6077       Lock       // kmp_critical_name *&<lock>
6078   };
6079   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6080                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6082     auto IPriv = Privates.begin();
6083     auto ILHS = LHSExprs.begin();
6084     auto IRHS = RHSExprs.begin();
6085     for (const Expr *E : ReductionOps) {
6086       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6087                                      cast<DeclRefExpr>(*IRHS));
6088       ++IPriv;
6089       ++ILHS;
6090       ++IRHS;
6091     }
6092   };
6093   RegionCodeGenTy RCG(CodeGen);
6094   CommonActionTy Action(
6095       nullptr, llvm::None,
6096       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6097                                        : OMPRTL__kmpc_end_reduce),
6098       EndArgs);
6099   RCG.setAction(Action);
6100   RCG(CGF);
6101 
6102   CGF.EmitBranch(DefaultBB);
6103 
6104   // 7. Build case 2:
6105   //  ...
6106   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6107   //  ...
6108   // break;
6109   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6110   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6111   CGF.EmitBlock(Case2BB);
6112 
6113   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6114                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6115     auto ILHS = LHSExprs.begin();
6116     auto IRHS = RHSExprs.begin();
6117     auto IPriv = Privates.begin();
6118     for (const Expr *E : ReductionOps) {
6119       const Expr *XExpr = nullptr;
6120       const Expr *EExpr = nullptr;
6121       const Expr *UpExpr = nullptr;
6122       BinaryOperatorKind BO = BO_Comma;
6123       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6124         if (BO->getOpcode() == BO_Assign) {
6125           XExpr = BO->getLHS();
6126           UpExpr = BO->getRHS();
6127         }
6128       }
6129       // Try to emit update expression as a simple atomic.
6130       const Expr *RHSExpr = UpExpr;
6131       if (RHSExpr) {
6132         // Analyze RHS part of the whole expression.
6133         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6134                 RHSExpr->IgnoreParenImpCasts())) {
6135           // If this is a conditional operator, analyze its condition for
6136           // min/max reduction operator.
6137           RHSExpr = ACO->getCond();
6138         }
6139         if (const auto *BORHS =
6140                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6141           EExpr = BORHS->getRHS();
6142           BO = BORHS->getOpcode();
6143         }
6144       }
6145       if (XExpr) {
6146         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6147         auto &&AtomicRedGen = [BO, VD,
6148                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6149                                     const Expr *EExpr, const Expr *UpExpr) {
6150           LValue X = CGF.EmitLValue(XExpr);
6151           RValue E;
6152           if (EExpr)
6153             E = CGF.EmitAnyExpr(EExpr);
6154           CGF.EmitOMPAtomicSimpleUpdateExpr(
6155               X, E, BO, /*IsXLHSInRHSPart=*/true,
6156               llvm::AtomicOrdering::Monotonic, Loc,
6157               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6158                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6159                 PrivateScope.addPrivate(
6160                     VD, [&CGF, VD, XRValue, Loc]() {
6161                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6162                       CGF.emitOMPSimpleStore(
6163                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6164                           VD->getType().getNonReferenceType(), Loc);
6165                       return LHSTemp;
6166                     });
6167                 (void)PrivateScope.Privatize();
6168                 return CGF.EmitAnyExpr(UpExpr);
6169               });
6170         };
6171         if ((*IPriv)->getType()->isArrayType()) {
6172           // Emit atomic reduction for array section.
6173           const auto *RHSVar =
6174               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6175           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6176                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6177         } else {
6178           // Emit atomic reduction for array subscript or single variable.
6179           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6180         }
6181       } else {
6182         // Emit as a critical region.
6183         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6184                                            const Expr *, const Expr *) {
6185           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6186           std::string Name = RT.getName({"atomic_reduction"});
6187           RT.emitCriticalRegion(
6188               CGF, Name,
6189               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6190                 Action.Enter(CGF);
6191                 emitReductionCombiner(CGF, E);
6192               },
6193               Loc);
6194         };
6195         if ((*IPriv)->getType()->isArrayType()) {
6196           const auto *LHSVar =
6197               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6198           const auto *RHSVar =
6199               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6200           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6201                                     CritRedGen);
6202         } else {
6203           CritRedGen(CGF, nullptr, nullptr, nullptr);
6204         }
6205       }
6206       ++ILHS;
6207       ++IRHS;
6208       ++IPriv;
6209     }
6210   };
6211   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6212   if (!WithNowait) {
6213     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6214     llvm::Value *EndArgs[] = {
6215         IdentTLoc, // ident_t *<loc>
6216         ThreadId,  // i32 <gtid>
6217         Lock       // kmp_critical_name *&<lock>
6218     };
6219     CommonActionTy Action(nullptr, llvm::None,
6220                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6221                           EndArgs);
6222     AtomicRCG.setAction(Action);
6223     AtomicRCG(CGF);
6224   } else {
6225     AtomicRCG(CGF);
6226   }
6227 
6228   CGF.EmitBranch(DefaultBB);
6229   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6230 }
6231 
6232 /// Generates unique name for artificial threadprivate variables.
6233 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6234 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6235                                       const Expr *Ref) {
6236   SmallString<256> Buffer;
6237   llvm::raw_svector_ostream Out(Buffer);
6238   const clang::DeclRefExpr *DE;
6239   const VarDecl *D = ::getBaseDecl(Ref, DE);
6240   if (!D)
6241     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6242   D = D->getCanonicalDecl();
6243   std::string Name = CGM.getOpenMPRuntime().getName(
6244       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6245   Out << Prefix << Name << "_"
6246       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6247   return std::string(Out.str());
6248 }
6249 
6250 /// Emits reduction initializer function:
6251 /// \code
6252 /// void @.red_init(void* %arg) {
6253 /// %0 = bitcast void* %arg to <type>*
6254 /// store <type> <init>, <type>* %0
6255 /// ret void
6256 /// }
6257 /// \endcode
6258 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6259                                            SourceLocation Loc,
6260                                            ReductionCodeGen &RCG, unsigned N) {
6261   ASTContext &C = CGM.getContext();
6262   FunctionArgList Args;
6263   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6264                           ImplicitParamDecl::Other);
6265   Args.emplace_back(&Param);
6266   const auto &FnInfo =
6267       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6268   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6269   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6270   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6271                                     Name, &CGM.getModule());
6272   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6273   Fn->setDoesNotRecurse();
6274   CodeGenFunction CGF(CGM);
6275   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6276   Address PrivateAddr = CGF.EmitLoadOfPointer(
6277       CGF.GetAddrOfLocalVar(&Param),
6278       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6279   llvm::Value *Size = nullptr;
6280   // If the size of the reduction item is non-constant, load it from global
6281   // threadprivate variable.
6282   if (RCG.getSizes(N).second) {
6283     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6284         CGF, CGM.getContext().getSizeType(),
6285         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6286     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6287                                 CGM.getContext().getSizeType(), Loc);
6288   }
6289   RCG.emitAggregateType(CGF, N, Size);
6290   LValue SharedLVal;
6291   // If initializer uses initializer from declare reduction construct, emit a
6292   // pointer to the address of the original reduction item (reuired by reduction
6293   // initializer)
6294   if (RCG.usesReductionInitializer(N)) {
6295     Address SharedAddr =
6296         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6297             CGF, CGM.getContext().VoidPtrTy,
6298             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6299     SharedAddr = CGF.EmitLoadOfPointer(
6300         SharedAddr,
6301         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6302     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6303   } else {
6304     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6305         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6306         CGM.getContext().VoidPtrTy);
6307   }
6308   // Emit the initializer:
6309   // %0 = bitcast void* %arg to <type>*
6310   // store <type> <init>, <type>* %0
6311   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6312                          [](CodeGenFunction &) { return false; });
6313   CGF.FinishFunction();
6314   return Fn;
6315 }
6316 
6317 /// Emits reduction combiner function:
6318 /// \code
6319 /// void @.red_comb(void* %arg0, void* %arg1) {
6320 /// %lhs = bitcast void* %arg0 to <type>*
6321 /// %rhs = bitcast void* %arg1 to <type>*
6322 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6323 /// store <type> %2, <type>* %lhs
6324 /// ret void
6325 /// }
6326 /// \endcode
6327 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6328                                            SourceLocation Loc,
6329                                            ReductionCodeGen &RCG, unsigned N,
6330                                            const Expr *ReductionOp,
6331                                            const Expr *LHS, const Expr *RHS,
6332                                            const Expr *PrivateRef) {
6333   ASTContext &C = CGM.getContext();
6334   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6335   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6336   FunctionArgList Args;
6337   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6338                                C.VoidPtrTy, ImplicitParamDecl::Other);
6339   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6340                             ImplicitParamDecl::Other);
6341   Args.emplace_back(&ParamInOut);
6342   Args.emplace_back(&ParamIn);
6343   const auto &FnInfo =
6344       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6345   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6346   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6347   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6348                                     Name, &CGM.getModule());
6349   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6350   Fn->setDoesNotRecurse();
6351   CodeGenFunction CGF(CGM);
6352   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6353   llvm::Value *Size = nullptr;
6354   // If the size of the reduction item is non-constant, load it from global
6355   // threadprivate variable.
6356   if (RCG.getSizes(N).second) {
6357     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6358         CGF, CGM.getContext().getSizeType(),
6359         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6360     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6361                                 CGM.getContext().getSizeType(), Loc);
6362   }
6363   RCG.emitAggregateType(CGF, N, Size);
6364   // Remap lhs and rhs variables to the addresses of the function arguments.
6365   // %lhs = bitcast void* %arg0 to <type>*
6366   // %rhs = bitcast void* %arg1 to <type>*
6367   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6368   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6369     // Pull out the pointer to the variable.
6370     Address PtrAddr = CGF.EmitLoadOfPointer(
6371         CGF.GetAddrOfLocalVar(&ParamInOut),
6372         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6373     return CGF.Builder.CreateElementBitCast(
6374         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6375   });
6376   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6377     // Pull out the pointer to the variable.
6378     Address PtrAddr = CGF.EmitLoadOfPointer(
6379         CGF.GetAddrOfLocalVar(&ParamIn),
6380         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6381     return CGF.Builder.CreateElementBitCast(
6382         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6383   });
6384   PrivateScope.Privatize();
6385   // Emit the combiner body:
6386   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6387   // store <type> %2, <type>* %lhs
6388   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6389       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6390       cast<DeclRefExpr>(RHS));
6391   CGF.FinishFunction();
6392   return Fn;
6393 }
6394 
6395 /// Emits reduction finalizer function:
6396 /// \code
6397 /// void @.red_fini(void* %arg) {
6398 /// %0 = bitcast void* %arg to <type>*
6399 /// <destroy>(<type>* %0)
6400 /// ret void
6401 /// }
6402 /// \endcode
6403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6404                                            SourceLocation Loc,
6405                                            ReductionCodeGen &RCG, unsigned N) {
6406   if (!RCG.needCleanups(N))
6407     return nullptr;
6408   ASTContext &C = CGM.getContext();
6409   FunctionArgList Args;
6410   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6411                           ImplicitParamDecl::Other);
6412   Args.emplace_back(&Param);
6413   const auto &FnInfo =
6414       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6415   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6416   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6417   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6418                                     Name, &CGM.getModule());
6419   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6420   Fn->setDoesNotRecurse();
6421   CodeGenFunction CGF(CGM);
6422   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6423   Address PrivateAddr = CGF.EmitLoadOfPointer(
6424       CGF.GetAddrOfLocalVar(&Param),
6425       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6426   llvm::Value *Size = nullptr;
6427   // If the size of the reduction item is non-constant, load it from global
6428   // threadprivate variable.
6429   if (RCG.getSizes(N).second) {
6430     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6431         CGF, CGM.getContext().getSizeType(),
6432         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6433     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6434                                 CGM.getContext().getSizeType(), Loc);
6435   }
6436   RCG.emitAggregateType(CGF, N, Size);
6437   // Emit the finalizer body:
6438   // <destroy>(<type>* %0)
6439   RCG.emitCleanups(CGF, N, PrivateAddr);
6440   CGF.FinishFunction(Loc);
6441   return Fn;
6442 }
6443 
6444 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6445     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6446     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6447   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6448     return nullptr;
6449 
6450   // Build typedef struct:
6451   // kmp_task_red_input {
6452   //   void *reduce_shar; // shared reduction item
6453   //   size_t reduce_size; // size of data item
6454   //   void *reduce_init; // data initialization routine
6455   //   void *reduce_fini; // data finalization routine
6456   //   void *reduce_comb; // data combiner routine
6457   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6458   // } kmp_task_red_input_t;
6459   ASTContext &C = CGM.getContext();
6460   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6461   RD->startDefinition();
6462   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6463   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6464   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6465   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6466   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6467   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6468       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6469   RD->completeDefinition();
6470   QualType RDType = C.getRecordType(RD);
6471   unsigned Size = Data.ReductionVars.size();
6472   llvm::APInt ArraySize(/*numBits=*/64, Size);
6473   QualType ArrayRDType = C.getConstantArrayType(
6474       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6475   // kmp_task_red_input_t .rd_input.[Size];
6476   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6477   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6478                        Data.ReductionOps);
6479   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6480     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6481     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6482                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6483     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6484         TaskRedInput.getPointer(), Idxs,
6485         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6486         ".rd_input.gep.");
6487     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6488     // ElemLVal.reduce_shar = &Shareds[Cnt];
6489     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6490     RCG.emitSharedLValue(CGF, Cnt);
6491     llvm::Value *CastedShared =
6492         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6493     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6494     RCG.emitAggregateType(CGF, Cnt);
6495     llvm::Value *SizeValInChars;
6496     llvm::Value *SizeVal;
6497     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6498     // We use delayed creation/initialization for VLAs, array sections and
6499     // custom reduction initializations. It is required because runtime does not
6500     // provide the way to pass the sizes of VLAs/array sections to
6501     // initializer/combiner/finalizer functions and does not pass the pointer to
6502     // original reduction item to the initializer. Instead threadprivate global
6503     // variables are used to store these values and use them in the functions.
6504     bool DelayedCreation = !!SizeVal;
6505     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6506                                                /*isSigned=*/false);
6507     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6508     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6509     // ElemLVal.reduce_init = init;
6510     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6511     llvm::Value *InitAddr =
6512         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6513     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6514     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6515     // ElemLVal.reduce_fini = fini;
6516     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6517     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6518     llvm::Value *FiniAddr = Fini
6519                                 ? CGF.EmitCastToVoidPtr(Fini)
6520                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6521     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6522     // ElemLVal.reduce_comb = comb;
6523     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6524     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6525         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6526         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6527     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6528     // ElemLVal.flags = 0;
6529     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6530     if (DelayedCreation) {
6531       CGF.EmitStoreOfScalar(
6532           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6533           FlagsLVal);
6534     } else
6535       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6536                                  FlagsLVal.getType());
6537   }
6538   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6539   // *data);
6540   llvm::Value *Args[] = {
6541       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6542                                 /*isSigned=*/true),
6543       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6544       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6545                                                       CGM.VoidPtrTy)};
6546   return CGF.EmitRuntimeCall(
6547       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6548 }
6549 
6550 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6551                                               SourceLocation Loc,
6552                                               ReductionCodeGen &RCG,
6553                                               unsigned N) {
6554   auto Sizes = RCG.getSizes(N);
6555   // Emit threadprivate global variable if the type is non-constant
6556   // (Sizes.second = nullptr).
6557   if (Sizes.second) {
6558     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6559                                                      /*isSigned=*/false);
6560     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6561         CGF, CGM.getContext().getSizeType(),
6562         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6563     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6564   }
6565   // Store address of the original reduction item if custom initializer is used.
6566   if (RCG.usesReductionInitializer(N)) {
6567     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6568         CGF, CGM.getContext().VoidPtrTy,
6569         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6570     CGF.Builder.CreateStore(
6571         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6572             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6573         SharedAddr, /*IsVolatile=*/false);
6574   }
6575 }
6576 
6577 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6578                                               SourceLocation Loc,
6579                                               llvm::Value *ReductionsPtr,
6580                                               LValue SharedLVal) {
6581   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6582   // *d);
6583   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6584                                                    CGM.IntTy,
6585                                                    /*isSigned=*/true),
6586                          ReductionsPtr,
6587                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6588                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6589   return Address(
6590       CGF.EmitRuntimeCall(
6591           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6592       SharedLVal.getAlignment());
6593 }
6594 
6595 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6596                                        SourceLocation Loc) {
6597   if (!CGF.HaveInsertPoint())
6598     return;
6599 
6600   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6601   if (OMPBuilder) {
6602     OMPBuilder->CreateTaskwait(CGF.Builder);
6603   } else {
6604     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6605     // global_tid);
6606     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6607     // Ignore return result until untied tasks are supported.
6608     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6609   }
6610 
6611   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6612     Region->emitUntiedSwitch(CGF);
6613 }
6614 
6615 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6616                                            OpenMPDirectiveKind InnerKind,
6617                                            const RegionCodeGenTy &CodeGen,
6618                                            bool HasCancel) {
6619   if (!CGF.HaveInsertPoint())
6620     return;
6621   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6622   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6623 }
6624 
6625 namespace {
6626 enum RTCancelKind {
6627   CancelNoreq = 0,
6628   CancelParallel = 1,
6629   CancelLoop = 2,
6630   CancelSections = 3,
6631   CancelTaskgroup = 4
6632 };
6633 } // anonymous namespace
6634 
6635 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6636   RTCancelKind CancelKind = CancelNoreq;
6637   if (CancelRegion == OMPD_parallel)
6638     CancelKind = CancelParallel;
6639   else if (CancelRegion == OMPD_for)
6640     CancelKind = CancelLoop;
6641   else if (CancelRegion == OMPD_sections)
6642     CancelKind = CancelSections;
6643   else {
6644     assert(CancelRegion == OMPD_taskgroup);
6645     CancelKind = CancelTaskgroup;
6646   }
6647   return CancelKind;
6648 }
6649 
6650 void CGOpenMPRuntime::emitCancellationPointCall(
6651     CodeGenFunction &CGF, SourceLocation Loc,
6652     OpenMPDirectiveKind CancelRegion) {
6653   if (!CGF.HaveInsertPoint())
6654     return;
6655   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6656   // global_tid, kmp_int32 cncl_kind);
6657   if (auto *OMPRegionInfo =
6658           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6659     // For 'cancellation point taskgroup', the task region info may not have a
6660     // cancel. This may instead happen in another adjacent task.
6661     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6662       llvm::Value *Args[] = {
6663           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6664           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6665       // Ignore return result until untied tasks are supported.
6666       llvm::Value *Result = CGF.EmitRuntimeCall(
6667           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6668       // if (__kmpc_cancellationpoint()) {
6669       //   exit from construct;
6670       // }
6671       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6672       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6673       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6674       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6675       CGF.EmitBlock(ExitBB);
6676       // exit from construct;
6677       CodeGenFunction::JumpDest CancelDest =
6678           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6679       CGF.EmitBranchThroughCleanup(CancelDest);
6680       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6681     }
6682   }
6683 }
6684 
6685 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6686                                      const Expr *IfCond,
6687                                      OpenMPDirectiveKind CancelRegion) {
6688   if (!CGF.HaveInsertPoint())
6689     return;
6690   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6691   // kmp_int32 cncl_kind);
6692   if (auto *OMPRegionInfo =
6693           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6694     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6695                                                         PrePostActionTy &) {
6696       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6697       llvm::Value *Args[] = {
6698           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6699           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6700       // Ignore return result until untied tasks are supported.
6701       llvm::Value *Result = CGF.EmitRuntimeCall(
6702           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6703       // if (__kmpc_cancel()) {
6704       //   exit from construct;
6705       // }
6706       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6707       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6708       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6709       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6710       CGF.EmitBlock(ExitBB);
6711       // exit from construct;
6712       CodeGenFunction::JumpDest CancelDest =
6713           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6714       CGF.EmitBranchThroughCleanup(CancelDest);
6715       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6716     };
6717     if (IfCond) {
6718       emitIfClause(CGF, IfCond, ThenGen,
6719                    [](CodeGenFunction &, PrePostActionTy &) {});
6720     } else {
6721       RegionCodeGenTy ThenRCG(ThenGen);
6722       ThenRCG(CGF);
6723     }
6724   }
6725 }
6726 
6727 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6728     const OMPExecutableDirective &D, StringRef ParentName,
6729     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6730     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6731   assert(!ParentName.empty() && "Invalid target region parent name!");
6732   HasEmittedTargetRegion = true;
6733   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6734                                    IsOffloadEntry, CodeGen);
6735 }
6736 
6737 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6738     const OMPExecutableDirective &D, StringRef ParentName,
6739     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6740     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6741   // Create a unique name for the entry function using the source location
6742   // information of the current target region. The name will be something like:
6743   //
6744   // __omp_offloading_DD_FFFF_PP_lBB
6745   //
6746   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6747   // mangled name of the function that encloses the target region and BB is the
6748   // line number of the target region.
6749 
6750   unsigned DeviceID;
6751   unsigned FileID;
6752   unsigned Line;
6753   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6754                            Line);
6755   SmallString<64> EntryFnName;
6756   {
6757     llvm::raw_svector_ostream OS(EntryFnName);
6758     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6759        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6760   }
6761 
6762   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6763 
6764   CodeGenFunction CGF(CGM, true);
6765   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6766   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767 
6768   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6769 
6770   // If this target outline function is not an offload entry, we don't need to
6771   // register it.
6772   if (!IsOffloadEntry)
6773     return;
6774 
6775   // The target region ID is used by the runtime library to identify the current
6776   // target region, so it only has to be unique and not necessarily point to
6777   // anything. It could be the pointer to the outlined function that implements
6778   // the target region, but we aren't using that so that the compiler doesn't
6779   // need to keep that, and could therefore inline the host function if proven
6780   // worthwhile during optimization. In the other hand, if emitting code for the
6781   // device, the ID has to be the function address so that it can retrieved from
6782   // the offloading entry and launched by the runtime library. We also mark the
6783   // outlined function to have external linkage in case we are emitting code for
6784   // the device, because these functions will be entry points to the device.
6785 
6786   if (CGM.getLangOpts().OpenMPIsDevice) {
6787     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6788     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6789     OutlinedFn->setDSOLocal(false);
6790   } else {
6791     std::string Name = getName({EntryFnName, "region_id"});
6792     OutlinedFnID = new llvm::GlobalVariable(
6793         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6794         llvm::GlobalValue::WeakAnyLinkage,
6795         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6796   }
6797 
6798   // Register the information for the entry associated with this target region.
6799   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6800       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6801       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6802 }
6803 
6804 /// Checks if the expression is constant or does not have non-trivial function
6805 /// calls.
6806 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6807   // We can skip constant expressions.
6808   // We can skip expressions with trivial calls or simple expressions.
6809   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6810           !E->hasNonTrivialCall(Ctx)) &&
6811          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6812 }
6813 
6814 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6815                                                     const Stmt *Body) {
6816   const Stmt *Child = Body->IgnoreContainers();
6817   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6818     Child = nullptr;
6819     for (const Stmt *S : C->body()) {
6820       if (const auto *E = dyn_cast<Expr>(S)) {
6821         if (isTrivial(Ctx, E))
6822           continue;
6823       }
6824       // Some of the statements can be ignored.
6825       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6826           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6827         continue;
6828       // Analyze declarations.
6829       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6830         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6831               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6832                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6833                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6834                   isa<UsingDirectiveDecl>(D) ||
6835                   isa<OMPDeclareReductionDecl>(D) ||
6836                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6837                 return true;
6838               const auto *VD = dyn_cast<VarDecl>(D);
6839               if (!VD)
6840                 return false;
6841               return VD->isConstexpr() ||
6842                      ((VD->getType().isTrivialType(Ctx) ||
6843                        VD->getType()->isReferenceType()) &&
6844                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6845             }))
6846           continue;
6847       }
6848       // Found multiple children - cannot get the one child only.
6849       if (Child)
6850         return nullptr;
6851       Child = S;
6852     }
6853     if (Child)
6854       Child = Child->IgnoreContainers();
6855   }
6856   return Child;
6857 }
6858 
6859 /// Emit the number of teams for a target directive.  Inspect the num_teams
6860 /// clause associated with a teams construct combined or closely nested
6861 /// with the target directive.
6862 ///
6863 /// Emit a team of size one for directives such as 'target parallel' that
6864 /// have no associated teams construct.
6865 ///
6866 /// Otherwise, return nullptr.
6867 static llvm::Value *
6868 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6869                                const OMPExecutableDirective &D) {
6870   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6871          "Clauses associated with the teams directive expected to be emitted "
6872          "only for the host!");
6873   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6874   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6875          "Expected target-based executable directive.");
6876   CGBuilderTy &Bld = CGF.Builder;
6877   switch (DirectiveKind) {
6878   case OMPD_target: {
6879     const auto *CS = D.getInnermostCapturedStmt();
6880     const auto *Body =
6881         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6882     const Stmt *ChildStmt =
6883         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6884     if (const auto *NestedDir =
6885             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6886       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6887         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6888           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6889           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6890           const Expr *NumTeams =
6891               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6892           llvm::Value *NumTeamsVal =
6893               CGF.EmitScalarExpr(NumTeams,
6894                                  /*IgnoreResultAssign*/ true);
6895           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6896                                    /*isSigned=*/true);
6897         }
6898         return Bld.getInt32(0);
6899       }
6900       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6901           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6902         return Bld.getInt32(1);
6903       return Bld.getInt32(0);
6904     }
6905     return nullptr;
6906   }
6907   case OMPD_target_teams:
6908   case OMPD_target_teams_distribute:
6909   case OMPD_target_teams_distribute_simd:
6910   case OMPD_target_teams_distribute_parallel_for:
6911   case OMPD_target_teams_distribute_parallel_for_simd: {
6912     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6913       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6914       const Expr *NumTeams =
6915           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6916       llvm::Value *NumTeamsVal =
6917           CGF.EmitScalarExpr(NumTeams,
6918                              /*IgnoreResultAssign*/ true);
6919       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6920                                /*isSigned=*/true);
6921     }
6922     return Bld.getInt32(0);
6923   }
6924   case OMPD_target_parallel:
6925   case OMPD_target_parallel_for:
6926   case OMPD_target_parallel_for_simd:
6927   case OMPD_target_simd:
6928     return Bld.getInt32(1);
6929   case OMPD_parallel:
6930   case OMPD_for:
6931   case OMPD_parallel_for:
6932   case OMPD_parallel_master:
6933   case OMPD_parallel_sections:
6934   case OMPD_for_simd:
6935   case OMPD_parallel_for_simd:
6936   case OMPD_cancel:
6937   case OMPD_cancellation_point:
6938   case OMPD_ordered:
6939   case OMPD_threadprivate:
6940   case OMPD_allocate:
6941   case OMPD_task:
6942   case OMPD_simd:
6943   case OMPD_sections:
6944   case OMPD_section:
6945   case OMPD_single:
6946   case OMPD_master:
6947   case OMPD_critical:
6948   case OMPD_taskyield:
6949   case OMPD_barrier:
6950   case OMPD_taskwait:
6951   case OMPD_taskgroup:
6952   case OMPD_atomic:
6953   case OMPD_flush:
6954   case OMPD_depobj:
6955   case OMPD_scan:
6956   case OMPD_teams:
6957   case OMPD_target_data:
6958   case OMPD_target_exit_data:
6959   case OMPD_target_enter_data:
6960   case OMPD_distribute:
6961   case OMPD_distribute_simd:
6962   case OMPD_distribute_parallel_for:
6963   case OMPD_distribute_parallel_for_simd:
6964   case OMPD_teams_distribute:
6965   case OMPD_teams_distribute_simd:
6966   case OMPD_teams_distribute_parallel_for:
6967   case OMPD_teams_distribute_parallel_for_simd:
6968   case OMPD_target_update:
6969   case OMPD_declare_simd:
6970   case OMPD_declare_variant:
6971   case OMPD_begin_declare_variant:
6972   case OMPD_end_declare_variant:
6973   case OMPD_declare_target:
6974   case OMPD_end_declare_target:
6975   case OMPD_declare_reduction:
6976   case OMPD_declare_mapper:
6977   case OMPD_taskloop:
6978   case OMPD_taskloop_simd:
6979   case OMPD_master_taskloop:
6980   case OMPD_master_taskloop_simd:
6981   case OMPD_parallel_master_taskloop:
6982   case OMPD_parallel_master_taskloop_simd:
6983   case OMPD_requires:
6984   case OMPD_unknown:
6985     break;
6986   }
6987   llvm_unreachable("Unexpected directive kind.");
6988 }
6989 
6990 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6991                                   llvm::Value *DefaultThreadLimitVal) {
6992   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6993       CGF.getContext(), CS->getCapturedStmt());
6994   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6995     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6996       llvm::Value *NumThreads = nullptr;
6997       llvm::Value *CondVal = nullptr;
6998       // Handle if clause. If if clause present, the number of threads is
6999       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7000       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7001         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7002         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7003         const OMPIfClause *IfClause = nullptr;
7004         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7005           if (C->getNameModifier() == OMPD_unknown ||
7006               C->getNameModifier() == OMPD_parallel) {
7007             IfClause = C;
7008             break;
7009           }
7010         }
7011         if (IfClause) {
7012           const Expr *Cond = IfClause->getCondition();
7013           bool Result;
7014           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7015             if (!Result)
7016               return CGF.Builder.getInt32(1);
7017           } else {
7018             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7019             if (const auto *PreInit =
7020                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7021               for (const auto *I : PreInit->decls()) {
7022                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7023                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7024                 } else {
7025                   CodeGenFunction::AutoVarEmission Emission =
7026                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7027                   CGF.EmitAutoVarCleanups(Emission);
7028                 }
7029               }
7030             }
7031             CondVal = CGF.EvaluateExprAsBool(Cond);
7032           }
7033         }
7034       }
7035       // Check the value of num_threads clause iff if clause was not specified
7036       // or is not evaluated to false.
7037       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7038         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7039         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7040         const auto *NumThreadsClause =
7041             Dir->getSingleClause<OMPNumThreadsClause>();
7042         CodeGenFunction::LexicalScope Scope(
7043             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7044         if (const auto *PreInit =
7045                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7046           for (const auto *I : PreInit->decls()) {
7047             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7048               CGF.EmitVarDecl(cast<VarDecl>(*I));
7049             } else {
7050               CodeGenFunction::AutoVarEmission Emission =
7051                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7052               CGF.EmitAutoVarCleanups(Emission);
7053             }
7054           }
7055         }
7056         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7057         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7058                                                /*isSigned=*/false);
7059         if (DefaultThreadLimitVal)
7060           NumThreads = CGF.Builder.CreateSelect(
7061               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7062               DefaultThreadLimitVal, NumThreads);
7063       } else {
7064         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7065                                            : CGF.Builder.getInt32(0);
7066       }
7067       // Process condition of the if clause.
7068       if (CondVal) {
7069         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7070                                               CGF.Builder.getInt32(1));
7071       }
7072       return NumThreads;
7073     }
7074     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7075       return CGF.Builder.getInt32(1);
7076     return DefaultThreadLimitVal;
7077   }
7078   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7079                                : CGF.Builder.getInt32(0);
7080 }
7081 
7082 /// Emit the number of threads for a target directive.  Inspect the
7083 /// thread_limit clause associated with a teams construct combined or closely
7084 /// nested with the target directive.
7085 ///
7086 /// Emit the num_threads clause for directives such as 'target parallel' that
7087 /// have no associated teams construct.
7088 ///
7089 /// Otherwise, return nullptr.
7090 static llvm::Value *
7091 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7092                                  const OMPExecutableDirective &D) {
7093   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7094          "Clauses associated with the teams directive expected to be emitted "
7095          "only for the host!");
7096   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7097   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7098          "Expected target-based executable directive.");
7099   CGBuilderTy &Bld = CGF.Builder;
7100   llvm::Value *ThreadLimitVal = nullptr;
7101   llvm::Value *NumThreadsVal = nullptr;
7102   switch (DirectiveKind) {
7103   case OMPD_target: {
7104     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7105     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7106       return NumThreads;
7107     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7108         CGF.getContext(), CS->getCapturedStmt());
7109     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7110       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7111         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7112         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7113         const auto *ThreadLimitClause =
7114             Dir->getSingleClause<OMPThreadLimitClause>();
7115         CodeGenFunction::LexicalScope Scope(
7116             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7117         if (const auto *PreInit =
7118                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7119           for (const auto *I : PreInit->decls()) {
7120             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7121               CGF.EmitVarDecl(cast<VarDecl>(*I));
7122             } else {
7123               CodeGenFunction::AutoVarEmission Emission =
7124                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7125               CGF.EmitAutoVarCleanups(Emission);
7126             }
7127           }
7128         }
7129         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7130             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7131         ThreadLimitVal =
7132             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7133       }
7134       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7135           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7136         CS = Dir->getInnermostCapturedStmt();
7137         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7138             CGF.getContext(), CS->getCapturedStmt());
7139         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7140       }
7141       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7142           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7143         CS = Dir->getInnermostCapturedStmt();
7144         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7145           return NumThreads;
7146       }
7147       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7148         return Bld.getInt32(1);
7149     }
7150     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7151   }
7152   case OMPD_target_teams: {
7153     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7154       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7155       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7156       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7157           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7158       ThreadLimitVal =
7159           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7160     }
7161     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7162     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7163       return NumThreads;
7164     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7165         CGF.getContext(), CS->getCapturedStmt());
7166     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7167       if (Dir->getDirectiveKind() == OMPD_distribute) {
7168         CS = Dir->getInnermostCapturedStmt();
7169         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7170           return NumThreads;
7171       }
7172     }
7173     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7174   }
7175   case OMPD_target_teams_distribute:
7176     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7177       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7178       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7179       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7180           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7181       ThreadLimitVal =
7182           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7183     }
7184     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7185   case OMPD_target_parallel:
7186   case OMPD_target_parallel_for:
7187   case OMPD_target_parallel_for_simd:
7188   case OMPD_target_teams_distribute_parallel_for:
7189   case OMPD_target_teams_distribute_parallel_for_simd: {
7190     llvm::Value *CondVal = nullptr;
7191     // Handle if clause. If if clause present, the number of threads is
7192     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7193     if (D.hasClausesOfKind<OMPIfClause>()) {
7194       const OMPIfClause *IfClause = nullptr;
7195       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7196         if (C->getNameModifier() == OMPD_unknown ||
7197             C->getNameModifier() == OMPD_parallel) {
7198           IfClause = C;
7199           break;
7200         }
7201       }
7202       if (IfClause) {
7203         const Expr *Cond = IfClause->getCondition();
7204         bool Result;
7205         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7206           if (!Result)
7207             return Bld.getInt32(1);
7208         } else {
7209           CodeGenFunction::RunCleanupsScope Scope(CGF);
7210           CondVal = CGF.EvaluateExprAsBool(Cond);
7211         }
7212       }
7213     }
7214     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7215       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7216       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7217       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7218           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7219       ThreadLimitVal =
7220           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7221     }
7222     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7223       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7224       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7225       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7226           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7227       NumThreadsVal =
7228           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7229       ThreadLimitVal = ThreadLimitVal
7230                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7231                                                                 ThreadLimitVal),
7232                                               NumThreadsVal, ThreadLimitVal)
7233                            : NumThreadsVal;
7234     }
7235     if (!ThreadLimitVal)
7236       ThreadLimitVal = Bld.getInt32(0);
7237     if (CondVal)
7238       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7239     return ThreadLimitVal;
7240   }
7241   case OMPD_target_teams_distribute_simd:
7242   case OMPD_target_simd:
7243     return Bld.getInt32(1);
7244   case OMPD_parallel:
7245   case OMPD_for:
7246   case OMPD_parallel_for:
7247   case OMPD_parallel_master:
7248   case OMPD_parallel_sections:
7249   case OMPD_for_simd:
7250   case OMPD_parallel_for_simd:
7251   case OMPD_cancel:
7252   case OMPD_cancellation_point:
7253   case OMPD_ordered:
7254   case OMPD_threadprivate:
7255   case OMPD_allocate:
7256   case OMPD_task:
7257   case OMPD_simd:
7258   case OMPD_sections:
7259   case OMPD_section:
7260   case OMPD_single:
7261   case OMPD_master:
7262   case OMPD_critical:
7263   case OMPD_taskyield:
7264   case OMPD_barrier:
7265   case OMPD_taskwait:
7266   case OMPD_taskgroup:
7267   case OMPD_atomic:
7268   case OMPD_flush:
7269   case OMPD_depobj:
7270   case OMPD_scan:
7271   case OMPD_teams:
7272   case OMPD_target_data:
7273   case OMPD_target_exit_data:
7274   case OMPD_target_enter_data:
7275   case OMPD_distribute:
7276   case OMPD_distribute_simd:
7277   case OMPD_distribute_parallel_for:
7278   case OMPD_distribute_parallel_for_simd:
7279   case OMPD_teams_distribute:
7280   case OMPD_teams_distribute_simd:
7281   case OMPD_teams_distribute_parallel_for:
7282   case OMPD_teams_distribute_parallel_for_simd:
7283   case OMPD_target_update:
7284   case OMPD_declare_simd:
7285   case OMPD_declare_variant:
7286   case OMPD_begin_declare_variant:
7287   case OMPD_end_declare_variant:
7288   case OMPD_declare_target:
7289   case OMPD_end_declare_target:
7290   case OMPD_declare_reduction:
7291   case OMPD_declare_mapper:
7292   case OMPD_taskloop:
7293   case OMPD_taskloop_simd:
7294   case OMPD_master_taskloop:
7295   case OMPD_master_taskloop_simd:
7296   case OMPD_parallel_master_taskloop:
7297   case OMPD_parallel_master_taskloop_simd:
7298   case OMPD_requires:
7299   case OMPD_unknown:
7300     break;
7301   }
7302   llvm_unreachable("Unsupported directive kind.");
7303 }
7304 
7305 namespace {
7306 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7307 
7308 // Utility to handle information from clauses associated with a given
7309 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7310 // It provides a convenient interface to obtain the information and generate
7311 // code for that information.
7312 class MappableExprsHandler {
7313 public:
7314   /// Values for bit flags used to specify the mapping type for
7315   /// offloading.
7316   enum OpenMPOffloadMappingFlags : uint64_t {
7317     /// No flags
7318     OMP_MAP_NONE = 0x0,
7319     /// Allocate memory on the device and move data from host to device.
7320     OMP_MAP_TO = 0x01,
7321     /// Allocate memory on the device and move data from device to host.
7322     OMP_MAP_FROM = 0x02,
7323     /// Always perform the requested mapping action on the element, even
7324     /// if it was already mapped before.
7325     OMP_MAP_ALWAYS = 0x04,
7326     /// Delete the element from the device environment, ignoring the
7327     /// current reference count associated with the element.
7328     OMP_MAP_DELETE = 0x08,
7329     /// The element being mapped is a pointer-pointee pair; both the
7330     /// pointer and the pointee should be mapped.
7331     OMP_MAP_PTR_AND_OBJ = 0x10,
7332     /// This flags signals that the base address of an entry should be
7333     /// passed to the target kernel as an argument.
7334     OMP_MAP_TARGET_PARAM = 0x20,
7335     /// Signal that the runtime library has to return the device pointer
7336     /// in the current position for the data being mapped. Used when we have the
7337     /// use_device_ptr clause.
7338     OMP_MAP_RETURN_PARAM = 0x40,
7339     /// This flag signals that the reference being passed is a pointer to
7340     /// private data.
7341     OMP_MAP_PRIVATE = 0x80,
7342     /// Pass the element to the device by value.
7343     OMP_MAP_LITERAL = 0x100,
7344     /// Implicit map
7345     OMP_MAP_IMPLICIT = 0x200,
7346     /// Close is a hint to the runtime to allocate memory close to
7347     /// the target device.
7348     OMP_MAP_CLOSE = 0x400,
7349     /// The 16 MSBs of the flags indicate whether the entry is member of some
7350     /// struct/class.
7351     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7352     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7353   };
7354 
7355   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7356   static unsigned getFlagMemberOffset() {
7357     unsigned Offset = 0;
7358     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7359          Remain = Remain >> 1)
7360       Offset++;
7361     return Offset;
7362   }
7363 
7364   /// Class that associates information with a base pointer to be passed to the
7365   /// runtime library.
7366   class BasePointerInfo {
7367     /// The base pointer.
7368     llvm::Value *Ptr = nullptr;
7369     /// The base declaration that refers to this device pointer, or null if
7370     /// there is none.
7371     const ValueDecl *DevPtrDecl = nullptr;
7372 
7373   public:
7374     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7375         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7376     llvm::Value *operator*() const { return Ptr; }
7377     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7378     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7379   };
7380 
7381   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7382   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7383   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7384 
7385   /// Map between a struct and the its lowest & highest elements which have been
7386   /// mapped.
7387   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7388   ///                    HE(FieldIndex, Pointer)}
7389   struct StructRangeInfoTy {
7390     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7391         0, Address::invalid()};
7392     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7393         0, Address::invalid()};
7394     Address Base = Address::invalid();
7395   };
7396 
7397 private:
7398   /// Kind that defines how a device pointer has to be returned.
7399   struct MapInfo {
7400     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7401     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7402     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7403     bool ReturnDevicePointer = false;
7404     bool IsImplicit = false;
7405 
7406     MapInfo() = default;
7407     MapInfo(
7408         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7409         OpenMPMapClauseKind MapType,
7410         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7411         bool ReturnDevicePointer, bool IsImplicit)
7412         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7413           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7414   };
7415 
7416   /// If use_device_ptr is used on a pointer which is a struct member and there
7417   /// is no map information about it, then emission of that entry is deferred
7418   /// until the whole struct has been processed.
7419   struct DeferredDevicePtrEntryTy {
7420     const Expr *IE = nullptr;
7421     const ValueDecl *VD = nullptr;
7422 
7423     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7424         : IE(IE), VD(VD) {}
7425   };
7426 
7427   /// The target directive from where the mappable clauses were extracted. It
7428   /// is either a executable directive or a user-defined mapper directive.
7429   llvm::PointerUnion<const OMPExecutableDirective *,
7430                      const OMPDeclareMapperDecl *>
7431       CurDir;
7432 
7433   /// Function the directive is being generated for.
7434   CodeGenFunction &CGF;
7435 
7436   /// Set of all first private variables in the current directive.
7437   /// bool data is set to true if the variable is implicitly marked as
7438   /// firstprivate, false otherwise.
7439   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7440 
7441   /// Map between device pointer declarations and their expression components.
7442   /// The key value for declarations in 'this' is null.
7443   llvm::DenseMap<
7444       const ValueDecl *,
7445       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7446       DevPointersMap;
7447 
7448   llvm::Value *getExprTypeSize(const Expr *E) const {
7449     QualType ExprTy = E->getType().getCanonicalType();
7450 
7451     // Calculate the size for array shaping expression.
7452     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7453       llvm::Value *Size =
7454           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7455       for (const Expr *SE : OAE->getDimensions()) {
7456         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7457         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7458                                       CGF.getContext().getSizeType(),
7459                                       SE->getExprLoc());
7460         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7461       }
7462       return Size;
7463     }
7464 
7465     // Reference types are ignored for mapping purposes.
7466     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7467       ExprTy = RefTy->getPointeeType().getCanonicalType();
7468 
7469     // Given that an array section is considered a built-in type, we need to
7470     // do the calculation based on the length of the section instead of relying
7471     // on CGF.getTypeSize(E->getType()).
7472     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7473       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7474                             OAE->getBase()->IgnoreParenImpCasts())
7475                             .getCanonicalType();
7476 
7477       // If there is no length associated with the expression and lower bound is
7478       // not specified too, that means we are using the whole length of the
7479       // base.
7480       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7481           !OAE->getLowerBound())
7482         return CGF.getTypeSize(BaseTy);
7483 
7484       llvm::Value *ElemSize;
7485       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7486         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7487       } else {
7488         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7489         assert(ATy && "Expecting array type if not a pointer type.");
7490         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7491       }
7492 
7493       // If we don't have a length at this point, that is because we have an
7494       // array section with a single element.
7495       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7496         return ElemSize;
7497 
7498       if (const Expr *LenExpr = OAE->getLength()) {
7499         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7500         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7501                                              CGF.getContext().getSizeType(),
7502                                              LenExpr->getExprLoc());
7503         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7504       }
7505       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7506              OAE->getLowerBound() && "expected array_section[lb:].");
7507       // Size = sizetype - lb * elemtype;
7508       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7509       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7510       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7511                                        CGF.getContext().getSizeType(),
7512                                        OAE->getLowerBound()->getExprLoc());
7513       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7514       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7515       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7516       LengthVal = CGF.Builder.CreateSelect(
7517           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7518       return LengthVal;
7519     }
7520     return CGF.getTypeSize(ExprTy);
7521   }
7522 
7523   /// Return the corresponding bits for a given map clause modifier. Add
7524   /// a flag marking the map as a pointer if requested. Add a flag marking the
7525   /// map as the first one of a series of maps that relate to the same map
7526   /// expression.
7527   OpenMPOffloadMappingFlags getMapTypeBits(
7528       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7529       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7530     OpenMPOffloadMappingFlags Bits =
7531         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7532     switch (MapType) {
7533     case OMPC_MAP_alloc:
7534     case OMPC_MAP_release:
7535       // alloc and release is the default behavior in the runtime library,  i.e.
7536       // if we don't pass any bits alloc/release that is what the runtime is
7537       // going to do. Therefore, we don't need to signal anything for these two
7538       // type modifiers.
7539       break;
7540     case OMPC_MAP_to:
7541       Bits |= OMP_MAP_TO;
7542       break;
7543     case OMPC_MAP_from:
7544       Bits |= OMP_MAP_FROM;
7545       break;
7546     case OMPC_MAP_tofrom:
7547       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7548       break;
7549     case OMPC_MAP_delete:
7550       Bits |= OMP_MAP_DELETE;
7551       break;
7552     case OMPC_MAP_unknown:
7553       llvm_unreachable("Unexpected map type!");
7554     }
7555     if (AddPtrFlag)
7556       Bits |= OMP_MAP_PTR_AND_OBJ;
7557     if (AddIsTargetParamFlag)
7558       Bits |= OMP_MAP_TARGET_PARAM;
7559     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7560         != MapModifiers.end())
7561       Bits |= OMP_MAP_ALWAYS;
7562     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7563         != MapModifiers.end())
7564       Bits |= OMP_MAP_CLOSE;
7565     return Bits;
7566   }
7567 
7568   /// Return true if the provided expression is a final array section. A
7569   /// final array section, is one whose length can't be proved to be one.
7570   bool isFinalArraySectionExpression(const Expr *E) const {
7571     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7572 
7573     // It is not an array section and therefore not a unity-size one.
7574     if (!OASE)
7575       return false;
7576 
7577     // An array section with no colon always refer to a single element.
7578     if (OASE->getColonLoc().isInvalid())
7579       return false;
7580 
7581     const Expr *Length = OASE->getLength();
7582 
7583     // If we don't have a length we have to check if the array has size 1
7584     // for this dimension. Also, we should always expect a length if the
7585     // base type is pointer.
7586     if (!Length) {
7587       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7588                              OASE->getBase()->IgnoreParenImpCasts())
7589                              .getCanonicalType();
7590       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7591         return ATy->getSize().getSExtValue() != 1;
7592       // If we don't have a constant dimension length, we have to consider
7593       // the current section as having any size, so it is not necessarily
7594       // unitary. If it happen to be unity size, that's user fault.
7595       return true;
7596     }
7597 
7598     // Check if the length evaluates to 1.
7599     Expr::EvalResult Result;
7600     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7601       return true; // Can have more that size 1.
7602 
7603     llvm::APSInt ConstLength = Result.Val.getInt();
7604     return ConstLength.getSExtValue() != 1;
7605   }
7606 
7607   /// Generate the base pointers, section pointers, sizes and map type
7608   /// bits for the provided map type, map modifier, and expression components.
7609   /// \a IsFirstComponent should be set to true if the provided set of
7610   /// components is the first associated with a capture.
7611   void generateInfoForComponentList(
7612       OpenMPMapClauseKind MapType,
7613       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7614       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7615       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7616       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7617       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7618       bool IsImplicit,
7619       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7620           OverlappedElements = llvm::None) const {
7621     // The following summarizes what has to be generated for each map and the
7622     // types below. The generated information is expressed in this order:
7623     // base pointer, section pointer, size, flags
7624     // (to add to the ones that come from the map type and modifier).
7625     //
7626     // double d;
7627     // int i[100];
7628     // float *p;
7629     //
7630     // struct S1 {
7631     //   int i;
7632     //   float f[50];
7633     // }
7634     // struct S2 {
7635     //   int i;
7636     //   float f[50];
7637     //   S1 s;
7638     //   double *p;
7639     //   struct S2 *ps;
7640     // }
7641     // S2 s;
7642     // S2 *ps;
7643     //
7644     // map(d)
7645     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7646     //
7647     // map(i)
7648     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7649     //
7650     // map(i[1:23])
7651     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7652     //
7653     // map(p)
7654     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7655     //
7656     // map(p[1:24])
7657     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7658     //
7659     // map(s)
7660     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7661     //
7662     // map(s.i)
7663     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7664     //
7665     // map(s.s.f)
7666     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7667     //
7668     // map(s.p)
7669     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7670     //
7671     // map(to: s.p[:22])
7672     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7673     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7674     // &(s.p), &(s.p[0]), 22*sizeof(double),
7675     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7676     // (*) alloc space for struct members, only this is a target parameter
7677     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7678     //      optimizes this entry out, same in the examples below)
7679     // (***) map the pointee (map: to)
7680     //
7681     // map(s.ps)
7682     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7683     //
7684     // map(from: s.ps->s.i)
7685     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7686     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7687     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7688     //
7689     // map(to: s.ps->ps)
7690     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7691     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7692     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7693     //
7694     // map(s.ps->ps->ps)
7695     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7696     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7697     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7698     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7699     //
7700     // map(to: s.ps->ps->s.f[:22])
7701     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7702     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7703     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7704     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7705     //
7706     // map(ps)
7707     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7708     //
7709     // map(ps->i)
7710     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7711     //
7712     // map(ps->s.f)
7713     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7714     //
7715     // map(from: ps->p)
7716     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7717     //
7718     // map(to: ps->p[:22])
7719     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7720     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7721     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7722     //
7723     // map(ps->ps)
7724     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7725     //
7726     // map(from: ps->ps->s.i)
7727     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7728     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7729     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7730     //
7731     // map(from: ps->ps->ps)
7732     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7733     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7734     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7735     //
7736     // map(ps->ps->ps->ps)
7737     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7738     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7739     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7740     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7741     //
7742     // map(to: ps->ps->ps->s.f[:22])
7743     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7744     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7745     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7746     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7747     //
7748     // map(to: s.f[:22]) map(from: s.p[:33])
7749     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7750     //     sizeof(double*) (**), TARGET_PARAM
7751     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7752     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7753     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7754     // (*) allocate contiguous space needed to fit all mapped members even if
7755     //     we allocate space for members not mapped (in this example,
7756     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7757     //     them as well because they fall between &s.f[0] and &s.p)
7758     //
7759     // map(from: s.f[:22]) map(to: ps->p[:33])
7760     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7761     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7762     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7763     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7764     // (*) the struct this entry pertains to is the 2nd element in the list of
7765     //     arguments, hence MEMBER_OF(2)
7766     //
7767     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7768     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7769     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7770     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7771     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7772     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7773     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7774     // (*) the struct this entry pertains to is the 4th element in the list
7775     //     of arguments, hence MEMBER_OF(4)
7776 
7777     // Track if the map information being generated is the first for a capture.
7778     bool IsCaptureFirstInfo = IsFirstComponentList;
7779     // When the variable is on a declare target link or in a to clause with
7780     // unified memory, a reference is needed to hold the host/device address
7781     // of the variable.
7782     bool RequiresReference = false;
7783 
7784     // Scan the components from the base to the complete expression.
7785     auto CI = Components.rbegin();
7786     auto CE = Components.rend();
7787     auto I = CI;
7788 
7789     // Track if the map information being generated is the first for a list of
7790     // components.
7791     bool IsExpressionFirstInfo = true;
7792     Address BP = Address::invalid();
7793     const Expr *AssocExpr = I->getAssociatedExpression();
7794     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7795     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7796     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7797 
7798     if (isa<MemberExpr>(AssocExpr)) {
7799       // The base is the 'this' pointer. The content of the pointer is going
7800       // to be the base of the field being mapped.
7801       BP = CGF.LoadCXXThisAddress();
7802     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7803                (OASE &&
7804                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7805       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7806     } else if (OAShE &&
7807                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7808       BP = Address(
7809           CGF.EmitScalarExpr(OAShE->getBase()),
7810           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7811     } else {
7812       // The base is the reference to the variable.
7813       // BP = &Var.
7814       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7815       if (const auto *VD =
7816               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7817         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7818                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7819           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7820               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7821                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7822             RequiresReference = true;
7823             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7824           }
7825         }
7826       }
7827 
7828       // If the variable is a pointer and is being dereferenced (i.e. is not
7829       // the last component), the base has to be the pointer itself, not its
7830       // reference. References are ignored for mapping purposes.
7831       QualType Ty =
7832           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7833       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7834         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7835 
7836         // We do not need to generate individual map information for the
7837         // pointer, it can be associated with the combined storage.
7838         ++I;
7839       }
7840     }
7841 
7842     // Track whether a component of the list should be marked as MEMBER_OF some
7843     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7844     // in a component list should be marked as MEMBER_OF, all subsequent entries
7845     // do not belong to the base struct. E.g.
7846     // struct S2 s;
7847     // s.ps->ps->ps->f[:]
7848     //   (1) (2) (3) (4)
7849     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7850     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7851     // is the pointee of ps(2) which is not member of struct s, so it should not
7852     // be marked as such (it is still PTR_AND_OBJ).
7853     // The variable is initialized to false so that PTR_AND_OBJ entries which
7854     // are not struct members are not considered (e.g. array of pointers to
7855     // data).
7856     bool ShouldBeMemberOf = false;
7857 
7858     // Variable keeping track of whether or not we have encountered a component
7859     // in the component list which is a member expression. Useful when we have a
7860     // pointer or a final array section, in which case it is the previous
7861     // component in the list which tells us whether we have a member expression.
7862     // E.g. X.f[:]
7863     // While processing the final array section "[:]" it is "f" which tells us
7864     // whether we are dealing with a member of a declared struct.
7865     const MemberExpr *EncounteredME = nullptr;
7866 
7867     for (; I != CE; ++I) {
7868       // If the current component is member of a struct (parent struct) mark it.
7869       if (!EncounteredME) {
7870         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7871         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7872         // as MEMBER_OF the parent struct.
7873         if (EncounteredME)
7874           ShouldBeMemberOf = true;
7875       }
7876 
7877       auto Next = std::next(I);
7878 
7879       // We need to generate the addresses and sizes if this is the last
7880       // component, if the component is a pointer or if it is an array section
7881       // whose length can't be proved to be one. If this is a pointer, it
7882       // becomes the base address for the following components.
7883 
7884       // A final array section, is one whose length can't be proved to be one.
7885       bool IsFinalArraySection =
7886           isFinalArraySectionExpression(I->getAssociatedExpression());
7887 
7888       // Get information on whether the element is a pointer. Have to do a
7889       // special treatment for array sections given that they are built-in
7890       // types.
7891       const auto *OASE =
7892           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7893       const auto *OAShE =
7894           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7895       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7896       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7897       bool IsPointer =
7898           OAShE ||
7899           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7900                        .getCanonicalType()
7901                        ->isAnyPointerType()) ||
7902           I->getAssociatedExpression()->getType()->isAnyPointerType();
7903       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7904 
7905       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7906         // If this is not the last component, we expect the pointer to be
7907         // associated with an array expression or member expression.
7908         assert((Next == CE ||
7909                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7910                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7911                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7912                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7913                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7914                "Unexpected expression");
7915 
7916         Address LB = Address::invalid();
7917         if (OAShE) {
7918           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7919                        CGF.getContext().getTypeAlignInChars(
7920                            OAShE->getBase()->getType()));
7921         } else {
7922           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7923                    .getAddress(CGF);
7924         }
7925 
7926         // If this component is a pointer inside the base struct then we don't
7927         // need to create any entry for it - it will be combined with the object
7928         // it is pointing to into a single PTR_AND_OBJ entry.
7929         bool IsMemberPointer =
7930             IsPointer && EncounteredME &&
7931             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7932              EncounteredME);
7933         if (!OverlappedElements.empty()) {
7934           // Handle base element with the info for overlapped elements.
7935           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7936           assert(Next == CE &&
7937                  "Expected last element for the overlapped elements.");
7938           assert(!IsPointer &&
7939                  "Unexpected base element with the pointer type.");
7940           // Mark the whole struct as the struct that requires allocation on the
7941           // device.
7942           PartialStruct.LowestElem = {0, LB};
7943           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7944               I->getAssociatedExpression()->getType());
7945           Address HB = CGF.Builder.CreateConstGEP(
7946               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7947                                                               CGF.VoidPtrTy),
7948               TypeSize.getQuantity() - 1);
7949           PartialStruct.HighestElem = {
7950               std::numeric_limits<decltype(
7951                   PartialStruct.HighestElem.first)>::max(),
7952               HB};
7953           PartialStruct.Base = BP;
7954           // Emit data for non-overlapped data.
7955           OpenMPOffloadMappingFlags Flags =
7956               OMP_MAP_MEMBER_OF |
7957               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7958                              /*AddPtrFlag=*/false,
7959                              /*AddIsTargetParamFlag=*/false);
7960           LB = BP;
7961           llvm::Value *Size = nullptr;
7962           // Do bitcopy of all non-overlapped structure elements.
7963           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7964                    Component : OverlappedElements) {
7965             Address ComponentLB = Address::invalid();
7966             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7967                  Component) {
7968               if (MC.getAssociatedDeclaration()) {
7969                 ComponentLB =
7970                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7971                         .getAddress(CGF);
7972                 Size = CGF.Builder.CreatePtrDiff(
7973                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7974                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7975                 break;
7976               }
7977             }
7978             BasePointers.push_back(BP.getPointer());
7979             Pointers.push_back(LB.getPointer());
7980             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7981                                                       /*isSigned=*/true));
7982             Types.push_back(Flags);
7983             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7984           }
7985           BasePointers.push_back(BP.getPointer());
7986           Pointers.push_back(LB.getPointer());
7987           Size = CGF.Builder.CreatePtrDiff(
7988               CGF.EmitCastToVoidPtr(
7989                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7990               CGF.EmitCastToVoidPtr(LB.getPointer()));
7991           Sizes.push_back(
7992               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7993           Types.push_back(Flags);
7994           break;
7995         }
7996         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7997         if (!IsMemberPointer) {
7998           BasePointers.push_back(BP.getPointer());
7999           Pointers.push_back(LB.getPointer());
8000           Sizes.push_back(
8001               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8002 
8003           // We need to add a pointer flag for each map that comes from the
8004           // same expression except for the first one. We also need to signal
8005           // this map is the first one that relates with the current capture
8006           // (there is a set of entries for each capture).
8007           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8008               MapType, MapModifiers, IsImplicit,
8009               !IsExpressionFirstInfo || RequiresReference,
8010               IsCaptureFirstInfo && !RequiresReference);
8011 
8012           if (!IsExpressionFirstInfo) {
8013             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8014             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8015             if (IsPointer)
8016               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8017                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8018 
8019             if (ShouldBeMemberOf) {
8020               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8021               // should be later updated with the correct value of MEMBER_OF.
8022               Flags |= OMP_MAP_MEMBER_OF;
8023               // From now on, all subsequent PTR_AND_OBJ entries should not be
8024               // marked as MEMBER_OF.
8025               ShouldBeMemberOf = false;
8026             }
8027           }
8028 
8029           Types.push_back(Flags);
8030         }
8031 
8032         // If we have encountered a member expression so far, keep track of the
8033         // mapped member. If the parent is "*this", then the value declaration
8034         // is nullptr.
8035         if (EncounteredME) {
8036           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8037           unsigned FieldIndex = FD->getFieldIndex();
8038 
8039           // Update info about the lowest and highest elements for this struct
8040           if (!PartialStruct.Base.isValid()) {
8041             PartialStruct.LowestElem = {FieldIndex, LB};
8042             PartialStruct.HighestElem = {FieldIndex, LB};
8043             PartialStruct.Base = BP;
8044           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8045             PartialStruct.LowestElem = {FieldIndex, LB};
8046           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8047             PartialStruct.HighestElem = {FieldIndex, LB};
8048           }
8049         }
8050 
8051         // If we have a final array section, we are done with this expression.
8052         if (IsFinalArraySection)
8053           break;
8054 
8055         // The pointer becomes the base for the next element.
8056         if (Next != CE)
8057           BP = LB;
8058 
8059         IsExpressionFirstInfo = false;
8060         IsCaptureFirstInfo = false;
8061       }
8062     }
8063   }
8064 
8065   /// Return the adjusted map modifiers if the declaration a capture refers to
8066   /// appears in a first-private clause. This is expected to be used only with
8067   /// directives that start with 'target'.
8068   MappableExprsHandler::OpenMPOffloadMappingFlags
8069   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8070     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8071 
8072     // A first private variable captured by reference will use only the
8073     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8074     // declaration is known as first-private in this handler.
8075     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8076       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8077           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8078         return MappableExprsHandler::OMP_MAP_ALWAYS |
8079                MappableExprsHandler::OMP_MAP_TO;
8080       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8081         return MappableExprsHandler::OMP_MAP_TO |
8082                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8083       return MappableExprsHandler::OMP_MAP_PRIVATE |
8084              MappableExprsHandler::OMP_MAP_TO;
8085     }
8086     return MappableExprsHandler::OMP_MAP_TO |
8087            MappableExprsHandler::OMP_MAP_FROM;
8088   }
8089 
8090   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8091     // Rotate by getFlagMemberOffset() bits.
8092     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8093                                                   << getFlagMemberOffset());
8094   }
8095 
8096   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8097                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8098     // If the entry is PTR_AND_OBJ but has not been marked with the special
8099     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8100     // marked as MEMBER_OF.
8101     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8102         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8103       return;
8104 
8105     // Reset the placeholder value to prepare the flag for the assignment of the
8106     // proper MEMBER_OF value.
8107     Flags &= ~OMP_MAP_MEMBER_OF;
8108     Flags |= MemberOfFlag;
8109   }
8110 
8111   void getPlainLayout(const CXXRecordDecl *RD,
8112                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8113                       bool AsBase) const {
8114     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8115 
8116     llvm::StructType *St =
8117         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8118 
8119     unsigned NumElements = St->getNumElements();
8120     llvm::SmallVector<
8121         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8122         RecordLayout(NumElements);
8123 
8124     // Fill bases.
8125     for (const auto &I : RD->bases()) {
8126       if (I.isVirtual())
8127         continue;
8128       const auto *Base = I.getType()->getAsCXXRecordDecl();
8129       // Ignore empty bases.
8130       if (Base->isEmpty() || CGF.getContext()
8131                                  .getASTRecordLayout(Base)
8132                                  .getNonVirtualSize()
8133                                  .isZero())
8134         continue;
8135 
8136       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8137       RecordLayout[FieldIndex] = Base;
8138     }
8139     // Fill in virtual bases.
8140     for (const auto &I : RD->vbases()) {
8141       const auto *Base = I.getType()->getAsCXXRecordDecl();
8142       // Ignore empty bases.
8143       if (Base->isEmpty())
8144         continue;
8145       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8146       if (RecordLayout[FieldIndex])
8147         continue;
8148       RecordLayout[FieldIndex] = Base;
8149     }
8150     // Fill in all the fields.
8151     assert(!RD->isUnion() && "Unexpected union.");
8152     for (const auto *Field : RD->fields()) {
8153       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8154       // will fill in later.)
8155       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8156         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8157         RecordLayout[FieldIndex] = Field;
8158       }
8159     }
8160     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8161              &Data : RecordLayout) {
8162       if (Data.isNull())
8163         continue;
8164       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8165         getPlainLayout(Base, Layout, /*AsBase=*/true);
8166       else
8167         Layout.push_back(Data.get<const FieldDecl *>());
8168     }
8169   }
8170 
8171 public:
8172   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8173       : CurDir(&Dir), CGF(CGF) {
8174     // Extract firstprivate clause information.
8175     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8176       for (const auto *D : C->varlists())
8177         FirstPrivateDecls.try_emplace(
8178             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8179     // Extract device pointer clause information.
8180     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8181       for (auto L : C->component_lists())
8182         DevPointersMap[L.first].push_back(L.second);
8183   }
8184 
8185   /// Constructor for the declare mapper directive.
8186   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8187       : CurDir(&Dir), CGF(CGF) {}
8188 
8189   /// Generate code for the combined entry if we have a partially mapped struct
8190   /// and take care of the mapping flags of the arguments corresponding to
8191   /// individual struct members.
8192   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8193                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8194                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8195                          const StructRangeInfoTy &PartialStruct) const {
8196     // Base is the base of the struct
8197     BasePointers.push_back(PartialStruct.Base.getPointer());
8198     // Pointer is the address of the lowest element
8199     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8200     Pointers.push_back(LB);
8201     // Size is (addr of {highest+1} element) - (addr of lowest element)
8202     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8203     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8204     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8205     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8206     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8207     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8208                                                   /*isSigned=*/false);
8209     Sizes.push_back(Size);
8210     // Map type is always TARGET_PARAM
8211     Types.push_back(OMP_MAP_TARGET_PARAM);
8212     // Remove TARGET_PARAM flag from the first element
8213     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8214 
8215     // All other current entries will be MEMBER_OF the combined entry
8216     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8217     // 0xFFFF in the MEMBER_OF field).
8218     OpenMPOffloadMappingFlags MemberOfFlag =
8219         getMemberOfFlag(BasePointers.size() - 1);
8220     for (auto &M : CurTypes)
8221       setCorrectMemberOfFlag(M, MemberOfFlag);
8222   }
8223 
8224   /// Generate all the base pointers, section pointers, sizes and map
8225   /// types for the extracted mappable expressions. Also, for each item that
8226   /// relates with a device pointer, a pair of the relevant declaration and
8227   /// index where it occurs is appended to the device pointers info array.
8228   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8229                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8230                        MapFlagsArrayTy &Types) const {
8231     // We have to process the component lists that relate with the same
8232     // declaration in a single chunk so that we can generate the map flags
8233     // correctly. Therefore, we organize all lists in a map.
8234     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8235 
8236     // Helper function to fill the information map for the different supported
8237     // clauses.
8238     auto &&InfoGen = [&Info](
8239         const ValueDecl *D,
8240         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8241         OpenMPMapClauseKind MapType,
8242         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8243         bool ReturnDevicePointer, bool IsImplicit) {
8244       const ValueDecl *VD =
8245           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8246       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8247                             IsImplicit);
8248     };
8249 
8250     assert(CurDir.is<const OMPExecutableDirective *>() &&
8251            "Expect a executable directive");
8252     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8253     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8254       for (const auto L : C->component_lists()) {
8255         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8256             /*ReturnDevicePointer=*/false, C->isImplicit());
8257       }
8258     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8259       for (const auto L : C->component_lists()) {
8260         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8261             /*ReturnDevicePointer=*/false, C->isImplicit());
8262       }
8263     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8264       for (const auto L : C->component_lists()) {
8265         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8266             /*ReturnDevicePointer=*/false, C->isImplicit());
8267       }
8268 
8269     // Look at the use_device_ptr clause information and mark the existing map
8270     // entries as such. If there is no map information for an entry in the
8271     // use_device_ptr list, we create one with map type 'alloc' and zero size
8272     // section. It is the user fault if that was not mapped before. If there is
8273     // no map information and the pointer is a struct member, then we defer the
8274     // emission of that entry until the whole struct has been processed.
8275     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8276         DeferredInfo;
8277 
8278     for (const auto *C :
8279          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8280       for (const auto L : C->component_lists()) {
8281         assert(!L.second.empty() && "Not expecting empty list of components!");
8282         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8283         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8284         const Expr *IE = L.second.back().getAssociatedExpression();
8285         // If the first component is a member expression, we have to look into
8286         // 'this', which maps to null in the map of map information. Otherwise
8287         // look directly for the information.
8288         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8289 
8290         // We potentially have map information for this declaration already.
8291         // Look for the first set of components that refer to it.
8292         if (It != Info.end()) {
8293           auto CI = std::find_if(
8294               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8295                 return MI.Components.back().getAssociatedDeclaration() == VD;
8296               });
8297           // If we found a map entry, signal that the pointer has to be returned
8298           // and move on to the next declaration.
8299           if (CI != It->second.end()) {
8300             CI->ReturnDevicePointer = true;
8301             continue;
8302           }
8303         }
8304 
8305         // We didn't find any match in our map information - generate a zero
8306         // size array section - if the pointer is a struct member we defer this
8307         // action until the whole struct has been processed.
8308         if (isa<MemberExpr>(IE)) {
8309           // Insert the pointer into Info to be processed by
8310           // generateInfoForComponentList. Because it is a member pointer
8311           // without a pointee, no entry will be generated for it, therefore
8312           // we need to generate one after the whole struct has been processed.
8313           // Nonetheless, generateInfoForComponentList must be called to take
8314           // the pointer into account for the calculation of the range of the
8315           // partial struct.
8316           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8317                   /*ReturnDevicePointer=*/false, C->isImplicit());
8318           DeferredInfo[nullptr].emplace_back(IE, VD);
8319         } else {
8320           llvm::Value *Ptr =
8321               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8322           BasePointers.emplace_back(Ptr, VD);
8323           Pointers.push_back(Ptr);
8324           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8325           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8326         }
8327       }
8328     }
8329 
8330     for (const auto &M : Info) {
8331       // We need to know when we generate information for the first component
8332       // associated with a capture, because the mapping flags depend on it.
8333       bool IsFirstComponentList = true;
8334 
8335       // Temporary versions of arrays
8336       MapBaseValuesArrayTy CurBasePointers;
8337       MapValuesArrayTy CurPointers;
8338       MapValuesArrayTy CurSizes;
8339       MapFlagsArrayTy CurTypes;
8340       StructRangeInfoTy PartialStruct;
8341 
8342       for (const MapInfo &L : M.second) {
8343         assert(!L.Components.empty() &&
8344                "Not expecting declaration with no component lists.");
8345 
8346         // Remember the current base pointer index.
8347         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8348         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8349                                      CurBasePointers, CurPointers, CurSizes,
8350                                      CurTypes, PartialStruct,
8351                                      IsFirstComponentList, L.IsImplicit);
8352 
8353         // If this entry relates with a device pointer, set the relevant
8354         // declaration and add the 'return pointer' flag.
8355         if (L.ReturnDevicePointer) {
8356           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8357                  "Unexpected number of mapped base pointers.");
8358 
8359           const ValueDecl *RelevantVD =
8360               L.Components.back().getAssociatedDeclaration();
8361           assert(RelevantVD &&
8362                  "No relevant declaration related with device pointer??");
8363 
8364           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8365           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8366         }
8367         IsFirstComponentList = false;
8368       }
8369 
8370       // Append any pending zero-length pointers which are struct members and
8371       // used with use_device_ptr.
8372       auto CI = DeferredInfo.find(M.first);
8373       if (CI != DeferredInfo.end()) {
8374         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8375           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8376           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8377               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8378           CurBasePointers.emplace_back(BasePtr, L.VD);
8379           CurPointers.push_back(Ptr);
8380           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8381           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8382           // value MEMBER_OF=FFFF so that the entry is later updated with the
8383           // correct value of MEMBER_OF.
8384           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8385                              OMP_MAP_MEMBER_OF);
8386         }
8387       }
8388 
8389       // If there is an entry in PartialStruct it means we have a struct with
8390       // individual members mapped. Emit an extra combined entry.
8391       if (PartialStruct.Base.isValid())
8392         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8393                           PartialStruct);
8394 
8395       // We need to append the results of this capture to what we already have.
8396       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8397       Pointers.append(CurPointers.begin(), CurPointers.end());
8398       Sizes.append(CurSizes.begin(), CurSizes.end());
8399       Types.append(CurTypes.begin(), CurTypes.end());
8400     }
8401   }
8402 
8403   /// Generate all the base pointers, section pointers, sizes and map types for
8404   /// the extracted map clauses of user-defined mapper.
8405   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8406                                 MapValuesArrayTy &Pointers,
8407                                 MapValuesArrayTy &Sizes,
8408                                 MapFlagsArrayTy &Types) const {
8409     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8410            "Expect a declare mapper directive");
8411     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8412     // We have to process the component lists that relate with the same
8413     // declaration in a single chunk so that we can generate the map flags
8414     // correctly. Therefore, we organize all lists in a map.
8415     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8416 
8417     // Helper function to fill the information map for the different supported
8418     // clauses.
8419     auto &&InfoGen = [&Info](
8420         const ValueDecl *D,
8421         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8422         OpenMPMapClauseKind MapType,
8423         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8424         bool ReturnDevicePointer, bool IsImplicit) {
8425       const ValueDecl *VD =
8426           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8427       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8428                             IsImplicit);
8429     };
8430 
8431     for (const auto *C : CurMapperDir->clauselists()) {
8432       const auto *MC = cast<OMPMapClause>(C);
8433       for (const auto L : MC->component_lists()) {
8434         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8435                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8436       }
8437     }
8438 
8439     for (const auto &M : Info) {
8440       // We need to know when we generate information for the first component
8441       // associated with a capture, because the mapping flags depend on it.
8442       bool IsFirstComponentList = true;
8443 
8444       // Temporary versions of arrays
8445       MapBaseValuesArrayTy CurBasePointers;
8446       MapValuesArrayTy CurPointers;
8447       MapValuesArrayTy CurSizes;
8448       MapFlagsArrayTy CurTypes;
8449       StructRangeInfoTy PartialStruct;
8450 
8451       for (const MapInfo &L : M.second) {
8452         assert(!L.Components.empty() &&
8453                "Not expecting declaration with no component lists.");
8454         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8455                                      CurBasePointers, CurPointers, CurSizes,
8456                                      CurTypes, PartialStruct,
8457                                      IsFirstComponentList, L.IsImplicit);
8458         IsFirstComponentList = false;
8459       }
8460 
8461       // If there is an entry in PartialStruct it means we have a struct with
8462       // individual members mapped. Emit an extra combined entry.
8463       if (PartialStruct.Base.isValid())
8464         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8465                           PartialStruct);
8466 
8467       // We need to append the results of this capture to what we already have.
8468       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8469       Pointers.append(CurPointers.begin(), CurPointers.end());
8470       Sizes.append(CurSizes.begin(), CurSizes.end());
8471       Types.append(CurTypes.begin(), CurTypes.end());
8472     }
8473   }
8474 
8475   /// Emit capture info for lambdas for variables captured by reference.
8476   void generateInfoForLambdaCaptures(
8477       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8478       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8479       MapFlagsArrayTy &Types,
8480       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8481     const auto *RD = VD->getType()
8482                          .getCanonicalType()
8483                          .getNonReferenceType()
8484                          ->getAsCXXRecordDecl();
8485     if (!RD || !RD->isLambda())
8486       return;
8487     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8488     LValue VDLVal = CGF.MakeAddrLValue(
8489         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8490     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8491     FieldDecl *ThisCapture = nullptr;
8492     RD->getCaptureFields(Captures, ThisCapture);
8493     if (ThisCapture) {
8494       LValue ThisLVal =
8495           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8496       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8497       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8498                                  VDLVal.getPointer(CGF));
8499       BasePointers.push_back(ThisLVal.getPointer(CGF));
8500       Pointers.push_back(ThisLValVal.getPointer(CGF));
8501       Sizes.push_back(
8502           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8503                                     CGF.Int64Ty, /*isSigned=*/true));
8504       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8505                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8506     }
8507     for (const LambdaCapture &LC : RD->captures()) {
8508       if (!LC.capturesVariable())
8509         continue;
8510       const VarDecl *VD = LC.getCapturedVar();
8511       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8512         continue;
8513       auto It = Captures.find(VD);
8514       assert(It != Captures.end() && "Found lambda capture without field.");
8515       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8516       if (LC.getCaptureKind() == LCK_ByRef) {
8517         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8518         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8519                                    VDLVal.getPointer(CGF));
8520         BasePointers.push_back(VarLVal.getPointer(CGF));
8521         Pointers.push_back(VarLValVal.getPointer(CGF));
8522         Sizes.push_back(CGF.Builder.CreateIntCast(
8523             CGF.getTypeSize(
8524                 VD->getType().getCanonicalType().getNonReferenceType()),
8525             CGF.Int64Ty, /*isSigned=*/true));
8526       } else {
8527         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8528         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8529                                    VDLVal.getPointer(CGF));
8530         BasePointers.push_back(VarLVal.getPointer(CGF));
8531         Pointers.push_back(VarRVal.getScalarVal());
8532         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8533       }
8534       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8535                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8536     }
8537   }
8538 
8539   /// Set correct indices for lambdas captures.
8540   void adjustMemberOfForLambdaCaptures(
8541       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8542       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8543       MapFlagsArrayTy &Types) const {
8544     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8545       // Set correct member_of idx for all implicit lambda captures.
8546       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8547                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8548         continue;
8549       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8550       assert(BasePtr && "Unable to find base lambda address.");
8551       int TgtIdx = -1;
8552       for (unsigned J = I; J > 0; --J) {
8553         unsigned Idx = J - 1;
8554         if (Pointers[Idx] != BasePtr)
8555           continue;
8556         TgtIdx = Idx;
8557         break;
8558       }
8559       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8560       // All other current entries will be MEMBER_OF the combined entry
8561       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8562       // 0xFFFF in the MEMBER_OF field).
8563       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8564       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8565     }
8566   }
8567 
8568   /// Generate the base pointers, section pointers, sizes and map types
8569   /// associated to a given capture.
8570   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8571                               llvm::Value *Arg,
8572                               MapBaseValuesArrayTy &BasePointers,
8573                               MapValuesArrayTy &Pointers,
8574                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8575                               StructRangeInfoTy &PartialStruct) const {
8576     assert(!Cap->capturesVariableArrayType() &&
8577            "Not expecting to generate map info for a variable array type!");
8578 
8579     // We need to know when we generating information for the first component
8580     const ValueDecl *VD = Cap->capturesThis()
8581                               ? nullptr
8582                               : Cap->getCapturedVar()->getCanonicalDecl();
8583 
8584     // If this declaration appears in a is_device_ptr clause we just have to
8585     // pass the pointer by value. If it is a reference to a declaration, we just
8586     // pass its value.
8587     if (DevPointersMap.count(VD)) {
8588       BasePointers.emplace_back(Arg, VD);
8589       Pointers.push_back(Arg);
8590       Sizes.push_back(
8591           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8592                                     CGF.Int64Ty, /*isSigned=*/true));
8593       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8594       return;
8595     }
8596 
8597     using MapData =
8598         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8599                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8600     SmallVector<MapData, 4> DeclComponentLists;
8601     assert(CurDir.is<const OMPExecutableDirective *>() &&
8602            "Expect a executable directive");
8603     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8604     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8605       for (const auto L : C->decl_component_lists(VD)) {
8606         assert(L.first == VD &&
8607                "We got information for the wrong declaration??");
8608         assert(!L.second.empty() &&
8609                "Not expecting declaration with no component lists.");
8610         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8611                                         C->getMapTypeModifiers(),
8612                                         C->isImplicit());
8613       }
8614     }
8615 
8616     // Find overlapping elements (including the offset from the base element).
8617     llvm::SmallDenseMap<
8618         const MapData *,
8619         llvm::SmallVector<
8620             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8621         4>
8622         OverlappedData;
8623     size_t Count = 0;
8624     for (const MapData &L : DeclComponentLists) {
8625       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8626       OpenMPMapClauseKind MapType;
8627       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8628       bool IsImplicit;
8629       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8630       ++Count;
8631       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8632         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8633         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8634         auto CI = Components.rbegin();
8635         auto CE = Components.rend();
8636         auto SI = Components1.rbegin();
8637         auto SE = Components1.rend();
8638         for (; CI != CE && SI != SE; ++CI, ++SI) {
8639           if (CI->getAssociatedExpression()->getStmtClass() !=
8640               SI->getAssociatedExpression()->getStmtClass())
8641             break;
8642           // Are we dealing with different variables/fields?
8643           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8644             break;
8645         }
8646         // Found overlapping if, at least for one component, reached the head of
8647         // the components list.
8648         if (CI == CE || SI == SE) {
8649           assert((CI != CE || SI != SE) &&
8650                  "Unexpected full match of the mapping components.");
8651           const MapData &BaseData = CI == CE ? L : L1;
8652           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8653               SI == SE ? Components : Components1;
8654           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8655           OverlappedElements.getSecond().push_back(SubData);
8656         }
8657       }
8658     }
8659     // Sort the overlapped elements for each item.
8660     llvm::SmallVector<const FieldDecl *, 4> Layout;
8661     if (!OverlappedData.empty()) {
8662       if (const auto *CRD =
8663               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8664         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8665       else {
8666         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8667         Layout.append(RD->field_begin(), RD->field_end());
8668       }
8669     }
8670     for (auto &Pair : OverlappedData) {
8671       llvm::sort(
8672           Pair.getSecond(),
8673           [&Layout](
8674               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8675               OMPClauseMappableExprCommon::MappableExprComponentListRef
8676                   Second) {
8677             auto CI = First.rbegin();
8678             auto CE = First.rend();
8679             auto SI = Second.rbegin();
8680             auto SE = Second.rend();
8681             for (; CI != CE && SI != SE; ++CI, ++SI) {
8682               if (CI->getAssociatedExpression()->getStmtClass() !=
8683                   SI->getAssociatedExpression()->getStmtClass())
8684                 break;
8685               // Are we dealing with different variables/fields?
8686               if (CI->getAssociatedDeclaration() !=
8687                   SI->getAssociatedDeclaration())
8688                 break;
8689             }
8690 
8691             // Lists contain the same elements.
8692             if (CI == CE && SI == SE)
8693               return false;
8694 
8695             // List with less elements is less than list with more elements.
8696             if (CI == CE || SI == SE)
8697               return CI == CE;
8698 
8699             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8700             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8701             if (FD1->getParent() == FD2->getParent())
8702               return FD1->getFieldIndex() < FD2->getFieldIndex();
8703             const auto It =
8704                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8705                   return FD == FD1 || FD == FD2;
8706                 });
8707             return *It == FD1;
8708           });
8709     }
8710 
8711     // Associated with a capture, because the mapping flags depend on it.
8712     // Go through all of the elements with the overlapped elements.
8713     for (const auto &Pair : OverlappedData) {
8714       const MapData &L = *Pair.getFirst();
8715       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8716       OpenMPMapClauseKind MapType;
8717       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8718       bool IsImplicit;
8719       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8720       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8721           OverlappedComponents = Pair.getSecond();
8722       bool IsFirstComponentList = true;
8723       generateInfoForComponentList(MapType, MapModifiers, Components,
8724                                    BasePointers, Pointers, Sizes, Types,
8725                                    PartialStruct, IsFirstComponentList,
8726                                    IsImplicit, OverlappedComponents);
8727     }
8728     // Go through other elements without overlapped elements.
8729     bool IsFirstComponentList = OverlappedData.empty();
8730     for (const MapData &L : DeclComponentLists) {
8731       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8732       OpenMPMapClauseKind MapType;
8733       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8734       bool IsImplicit;
8735       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8736       auto It = OverlappedData.find(&L);
8737       if (It == OverlappedData.end())
8738         generateInfoForComponentList(MapType, MapModifiers, Components,
8739                                      BasePointers, Pointers, Sizes, Types,
8740                                      PartialStruct, IsFirstComponentList,
8741                                      IsImplicit);
8742       IsFirstComponentList = false;
8743     }
8744   }
8745 
8746   /// Generate the base pointers, section pointers, sizes and map types
8747   /// associated with the declare target link variables.
8748   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8749                                         MapValuesArrayTy &Pointers,
8750                                         MapValuesArrayTy &Sizes,
8751                                         MapFlagsArrayTy &Types) const {
8752     assert(CurDir.is<const OMPExecutableDirective *>() &&
8753            "Expect a executable directive");
8754     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8755     // Map other list items in the map clause which are not captured variables
8756     // but "declare target link" global variables.
8757     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8758       for (const auto L : C->component_lists()) {
8759         if (!L.first)
8760           continue;
8761         const auto *VD = dyn_cast<VarDecl>(L.first);
8762         if (!VD)
8763           continue;
8764         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8765             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8766         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8767             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8768           continue;
8769         StructRangeInfoTy PartialStruct;
8770         generateInfoForComponentList(
8771             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8772             Pointers, Sizes, Types, PartialStruct,
8773             /*IsFirstComponentList=*/true, C->isImplicit());
8774         assert(!PartialStruct.Base.isValid() &&
8775                "No partial structs for declare target link expected.");
8776       }
8777     }
8778   }
8779 
8780   /// Generate the default map information for a given capture \a CI,
8781   /// record field declaration \a RI and captured value \a CV.
8782   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8783                               const FieldDecl &RI, llvm::Value *CV,
8784                               MapBaseValuesArrayTy &CurBasePointers,
8785                               MapValuesArrayTy &CurPointers,
8786                               MapValuesArrayTy &CurSizes,
8787                               MapFlagsArrayTy &CurMapTypes) const {
8788     bool IsImplicit = true;
8789     // Do the default mapping.
8790     if (CI.capturesThis()) {
8791       CurBasePointers.push_back(CV);
8792       CurPointers.push_back(CV);
8793       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8794       CurSizes.push_back(
8795           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8796                                     CGF.Int64Ty, /*isSigned=*/true));
8797       // Default map type.
8798       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8799     } else if (CI.capturesVariableByCopy()) {
8800       CurBasePointers.push_back(CV);
8801       CurPointers.push_back(CV);
8802       if (!RI.getType()->isAnyPointerType()) {
8803         // We have to signal to the runtime captures passed by value that are
8804         // not pointers.
8805         CurMapTypes.push_back(OMP_MAP_LITERAL);
8806         CurSizes.push_back(CGF.Builder.CreateIntCast(
8807             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8808       } else {
8809         // Pointers are implicitly mapped with a zero size and no flags
8810         // (other than first map that is added for all implicit maps).
8811         CurMapTypes.push_back(OMP_MAP_NONE);
8812         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8813       }
8814       const VarDecl *VD = CI.getCapturedVar();
8815       auto I = FirstPrivateDecls.find(VD);
8816       if (I != FirstPrivateDecls.end())
8817         IsImplicit = I->getSecond();
8818     } else {
8819       assert(CI.capturesVariable() && "Expected captured reference.");
8820       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8821       QualType ElementType = PtrTy->getPointeeType();
8822       CurSizes.push_back(CGF.Builder.CreateIntCast(
8823           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8824       // The default map type for a scalar/complex type is 'to' because by
8825       // default the value doesn't have to be retrieved. For an aggregate
8826       // type, the default is 'tofrom'.
8827       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8828       const VarDecl *VD = CI.getCapturedVar();
8829       auto I = FirstPrivateDecls.find(VD);
8830       if (I != FirstPrivateDecls.end() &&
8831           VD->getType().isConstant(CGF.getContext())) {
8832         llvm::Constant *Addr =
8833             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8834         // Copy the value of the original variable to the new global copy.
8835         CGF.Builder.CreateMemCpy(
8836             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8837             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8838             CurSizes.back(), /*IsVolatile=*/false);
8839         // Use new global variable as the base pointers.
8840         CurBasePointers.push_back(Addr);
8841         CurPointers.push_back(Addr);
8842       } else {
8843         CurBasePointers.push_back(CV);
8844         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8845           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8846               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8847               AlignmentSource::Decl));
8848           CurPointers.push_back(PtrAddr.getPointer());
8849         } else {
8850           CurPointers.push_back(CV);
8851         }
8852       }
8853       if (I != FirstPrivateDecls.end())
8854         IsImplicit = I->getSecond();
8855     }
8856     // Every default map produces a single argument which is a target parameter.
8857     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8858 
8859     // Add flag stating this is an implicit map.
8860     if (IsImplicit)
8861       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8862   }
8863 };
8864 } // anonymous namespace
8865 
8866 /// Emit the arrays used to pass the captures and map information to the
8867 /// offloading runtime library. If there is no map or capture information,
8868 /// return nullptr by reference.
8869 static void
8870 emitOffloadingArrays(CodeGenFunction &CGF,
8871                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8872                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8873                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8874                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8875                      CGOpenMPRuntime::TargetDataInfo &Info) {
8876   CodeGenModule &CGM = CGF.CGM;
8877   ASTContext &Ctx = CGF.getContext();
8878 
8879   // Reset the array information.
8880   Info.clearArrayInfo();
8881   Info.NumberOfPtrs = BasePointers.size();
8882 
8883   if (Info.NumberOfPtrs) {
8884     // Detect if we have any capture size requiring runtime evaluation of the
8885     // size so that a constant array could be eventually used.
8886     bool hasRuntimeEvaluationCaptureSize = false;
8887     for (llvm::Value *S : Sizes)
8888       if (!isa<llvm::Constant>(S)) {
8889         hasRuntimeEvaluationCaptureSize = true;
8890         break;
8891       }
8892 
8893     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8894     QualType PointerArrayType = Ctx.getConstantArrayType(
8895         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8896         /*IndexTypeQuals=*/0);
8897 
8898     Info.BasePointersArray =
8899         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8900     Info.PointersArray =
8901         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8902 
8903     // If we don't have any VLA types or other types that require runtime
8904     // evaluation, we can use a constant array for the map sizes, otherwise we
8905     // need to fill up the arrays as we do for the pointers.
8906     QualType Int64Ty =
8907         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8908     if (hasRuntimeEvaluationCaptureSize) {
8909       QualType SizeArrayType = Ctx.getConstantArrayType(
8910           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8911           /*IndexTypeQuals=*/0);
8912       Info.SizesArray =
8913           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8914     } else {
8915       // We expect all the sizes to be constant, so we collect them to create
8916       // a constant array.
8917       SmallVector<llvm::Constant *, 16> ConstSizes;
8918       for (llvm::Value *S : Sizes)
8919         ConstSizes.push_back(cast<llvm::Constant>(S));
8920 
8921       auto *SizesArrayInit = llvm::ConstantArray::get(
8922           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8923       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8924       auto *SizesArrayGbl = new llvm::GlobalVariable(
8925           CGM.getModule(), SizesArrayInit->getType(),
8926           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8927           SizesArrayInit, Name);
8928       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8929       Info.SizesArray = SizesArrayGbl;
8930     }
8931 
8932     // The map types are always constant so we don't need to generate code to
8933     // fill arrays. Instead, we create an array constant.
8934     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8935     llvm::copy(MapTypes, Mapping.begin());
8936     llvm::Constant *MapTypesArrayInit =
8937         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8938     std::string MaptypesName =
8939         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8940     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8941         CGM.getModule(), MapTypesArrayInit->getType(),
8942         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8943         MapTypesArrayInit, MaptypesName);
8944     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8945     Info.MapTypesArray = MapTypesArrayGbl;
8946 
8947     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8948       llvm::Value *BPVal = *BasePointers[I];
8949       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8950           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8951           Info.BasePointersArray, 0, I);
8952       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8953           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8954       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8955       CGF.Builder.CreateStore(BPVal, BPAddr);
8956 
8957       if (Info.requiresDevicePointerInfo())
8958         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8959           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8960 
8961       llvm::Value *PVal = Pointers[I];
8962       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8963           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8964           Info.PointersArray, 0, I);
8965       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8966           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8967       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8968       CGF.Builder.CreateStore(PVal, PAddr);
8969 
8970       if (hasRuntimeEvaluationCaptureSize) {
8971         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8972             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8973             Info.SizesArray,
8974             /*Idx0=*/0,
8975             /*Idx1=*/I);
8976         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8977         CGF.Builder.CreateStore(
8978             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8979             SAddr);
8980       }
8981     }
8982   }
8983 }
8984 
8985 /// Emit the arguments to be passed to the runtime library based on the
8986 /// arrays of pointers, sizes and map types.
8987 static void emitOffloadingArraysArgument(
8988     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8989     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8990     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8991   CodeGenModule &CGM = CGF.CGM;
8992   if (Info.NumberOfPtrs) {
8993     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8994         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8995         Info.BasePointersArray,
8996         /*Idx0=*/0, /*Idx1=*/0);
8997     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8998         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8999         Info.PointersArray,
9000         /*Idx0=*/0,
9001         /*Idx1=*/0);
9002     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9003         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9004         /*Idx0=*/0, /*Idx1=*/0);
9005     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9006         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9007         Info.MapTypesArray,
9008         /*Idx0=*/0,
9009         /*Idx1=*/0);
9010   } else {
9011     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9012     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9013     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9014     MapTypesArrayArg =
9015         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9016   }
9017 }
9018 
9019 /// Check for inner distribute directive.
9020 static const OMPExecutableDirective *
9021 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9022   const auto *CS = D.getInnermostCapturedStmt();
9023   const auto *Body =
9024       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9025   const Stmt *ChildStmt =
9026       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9027 
9028   if (const auto *NestedDir =
9029           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9030     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9031     switch (D.getDirectiveKind()) {
9032     case OMPD_target:
9033       if (isOpenMPDistributeDirective(DKind))
9034         return NestedDir;
9035       if (DKind == OMPD_teams) {
9036         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9037             /*IgnoreCaptured=*/true);
9038         if (!Body)
9039           return nullptr;
9040         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9041         if (const auto *NND =
9042                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9043           DKind = NND->getDirectiveKind();
9044           if (isOpenMPDistributeDirective(DKind))
9045             return NND;
9046         }
9047       }
9048       return nullptr;
9049     case OMPD_target_teams:
9050       if (isOpenMPDistributeDirective(DKind))
9051         return NestedDir;
9052       return nullptr;
9053     case OMPD_target_parallel:
9054     case OMPD_target_simd:
9055     case OMPD_target_parallel_for:
9056     case OMPD_target_parallel_for_simd:
9057       return nullptr;
9058     case OMPD_target_teams_distribute:
9059     case OMPD_target_teams_distribute_simd:
9060     case OMPD_target_teams_distribute_parallel_for:
9061     case OMPD_target_teams_distribute_parallel_for_simd:
9062     case OMPD_parallel:
9063     case OMPD_for:
9064     case OMPD_parallel_for:
9065     case OMPD_parallel_master:
9066     case OMPD_parallel_sections:
9067     case OMPD_for_simd:
9068     case OMPD_parallel_for_simd:
9069     case OMPD_cancel:
9070     case OMPD_cancellation_point:
9071     case OMPD_ordered:
9072     case OMPD_threadprivate:
9073     case OMPD_allocate:
9074     case OMPD_task:
9075     case OMPD_simd:
9076     case OMPD_sections:
9077     case OMPD_section:
9078     case OMPD_single:
9079     case OMPD_master:
9080     case OMPD_critical:
9081     case OMPD_taskyield:
9082     case OMPD_barrier:
9083     case OMPD_taskwait:
9084     case OMPD_taskgroup:
9085     case OMPD_atomic:
9086     case OMPD_flush:
9087     case OMPD_depobj:
9088     case OMPD_scan:
9089     case OMPD_teams:
9090     case OMPD_target_data:
9091     case OMPD_target_exit_data:
9092     case OMPD_target_enter_data:
9093     case OMPD_distribute:
9094     case OMPD_distribute_simd:
9095     case OMPD_distribute_parallel_for:
9096     case OMPD_distribute_parallel_for_simd:
9097     case OMPD_teams_distribute:
9098     case OMPD_teams_distribute_simd:
9099     case OMPD_teams_distribute_parallel_for:
9100     case OMPD_teams_distribute_parallel_for_simd:
9101     case OMPD_target_update:
9102     case OMPD_declare_simd:
9103     case OMPD_declare_variant:
9104     case OMPD_begin_declare_variant:
9105     case OMPD_end_declare_variant:
9106     case OMPD_declare_target:
9107     case OMPD_end_declare_target:
9108     case OMPD_declare_reduction:
9109     case OMPD_declare_mapper:
9110     case OMPD_taskloop:
9111     case OMPD_taskloop_simd:
9112     case OMPD_master_taskloop:
9113     case OMPD_master_taskloop_simd:
9114     case OMPD_parallel_master_taskloop:
9115     case OMPD_parallel_master_taskloop_simd:
9116     case OMPD_requires:
9117     case OMPD_unknown:
9118       llvm_unreachable("Unexpected directive.");
9119     }
9120   }
9121 
9122   return nullptr;
9123 }
9124 
9125 /// Emit the user-defined mapper function. The code generation follows the
9126 /// pattern in the example below.
9127 /// \code
9128 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9129 ///                                           void *base, void *begin,
9130 ///                                           int64_t size, int64_t type) {
9131 ///   // Allocate space for an array section first.
9132 ///   if (size > 1 && !maptype.IsDelete)
9133 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9134 ///                                 size*sizeof(Ty), clearToFrom(type));
9135 ///   // Map members.
9136 ///   for (unsigned i = 0; i < size; i++) {
9137 ///     // For each component specified by this mapper:
9138 ///     for (auto c : all_components) {
9139 ///       if (c.hasMapper())
9140 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9141 ///                       c.arg_type);
9142 ///       else
9143 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9144 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9145 ///     }
9146 ///   }
9147 ///   // Delete the array section.
9148 ///   if (size > 1 && maptype.IsDelete)
9149 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9150 ///                                 size*sizeof(Ty), clearToFrom(type));
9151 /// }
9152 /// \endcode
9153 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9154                                             CodeGenFunction *CGF) {
9155   if (UDMMap.count(D) > 0)
9156     return;
9157   ASTContext &C = CGM.getContext();
9158   QualType Ty = D->getType();
9159   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9160   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9161   auto *MapperVarDecl =
9162       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9163   SourceLocation Loc = D->getLocation();
9164   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9165 
9166   // Prepare mapper function arguments and attributes.
9167   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9168                               C.VoidPtrTy, ImplicitParamDecl::Other);
9169   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9170                             ImplicitParamDecl::Other);
9171   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9172                              C.VoidPtrTy, ImplicitParamDecl::Other);
9173   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9174                             ImplicitParamDecl::Other);
9175   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9176                             ImplicitParamDecl::Other);
9177   FunctionArgList Args;
9178   Args.push_back(&HandleArg);
9179   Args.push_back(&BaseArg);
9180   Args.push_back(&BeginArg);
9181   Args.push_back(&SizeArg);
9182   Args.push_back(&TypeArg);
9183   const CGFunctionInfo &FnInfo =
9184       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9185   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9186   SmallString<64> TyStr;
9187   llvm::raw_svector_ostream Out(TyStr);
9188   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9189   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9190   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9191                                     Name, &CGM.getModule());
9192   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9193   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9194   // Start the mapper function code generation.
9195   CodeGenFunction MapperCGF(CGM);
9196   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9197   // Compute the starting and end addreses of array elements.
9198   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9199       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9200       C.getPointerType(Int64Ty), Loc);
9201   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9202       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9203       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9204   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9205   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9206       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9207       C.getPointerType(Int64Ty), Loc);
9208   // Prepare common arguments for array initiation and deletion.
9209   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9210       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9211       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9212   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9213       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9214       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9215   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9216       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9217       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9218 
9219   // Emit array initiation if this is an array section and \p MapType indicates
9220   // that memory allocation is required.
9221   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9222   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9223                              ElementSize, HeadBB, /*IsInit=*/true);
9224 
9225   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9226 
9227   // Emit the loop header block.
9228   MapperCGF.EmitBlock(HeadBB);
9229   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9230   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9231   // Evaluate whether the initial condition is satisfied.
9232   llvm::Value *IsEmpty =
9233       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9234   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9235   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9236 
9237   // Emit the loop body block.
9238   MapperCGF.EmitBlock(BodyBB);
9239   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9240       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9241   PtrPHI->addIncoming(PtrBegin, EntryBB);
9242   Address PtrCurrent =
9243       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9244                           .getAlignment()
9245                           .alignmentOfArrayElement(ElementSize));
9246   // Privatize the declared variable of mapper to be the current array element.
9247   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9248   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9249     return MapperCGF
9250         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9251         .getAddress(MapperCGF);
9252   });
9253   (void)Scope.Privatize();
9254 
9255   // Get map clause information. Fill up the arrays with all mapped variables.
9256   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9257   MappableExprsHandler::MapValuesArrayTy Pointers;
9258   MappableExprsHandler::MapValuesArrayTy Sizes;
9259   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9260   MappableExprsHandler MEHandler(*D, MapperCGF);
9261   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9262 
9263   // Call the runtime API __tgt_mapper_num_components to get the number of
9264   // pre-existing components.
9265   llvm::Value *OffloadingArgs[] = {Handle};
9266   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9267       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9268   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9269       PreviousSize,
9270       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9271 
9272   // Fill up the runtime mapper handle for all components.
9273   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9274     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9275         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9276     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9277         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9278     llvm::Value *CurSizeArg = Sizes[I];
9279 
9280     // Extract the MEMBER_OF field from the map type.
9281     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9282     MapperCGF.EmitBlock(MemberBB);
9283     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9284     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9285         OriMapType,
9286         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9287     llvm::BasicBlock *MemberCombineBB =
9288         MapperCGF.createBasicBlock("omp.member.combine");
9289     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9290     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9291     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9292     // Add the number of pre-existing components to the MEMBER_OF field if it
9293     // is valid.
9294     MapperCGF.EmitBlock(MemberCombineBB);
9295     llvm::Value *CombinedMember =
9296         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9297     // Do nothing if it is not a member of previous components.
9298     MapperCGF.EmitBlock(TypeBB);
9299     llvm::PHINode *MemberMapType =
9300         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9301     MemberMapType->addIncoming(OriMapType, MemberBB);
9302     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9303 
9304     // Combine the map type inherited from user-defined mapper with that
9305     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9306     // bits of the \a MapType, which is the input argument of the mapper
9307     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9308     // bits of MemberMapType.
9309     // [OpenMP 5.0], 1.2.6. map-type decay.
9310     //        | alloc |  to   | from  | tofrom | release | delete
9311     // ----------------------------------------------------------
9312     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9313     // to     | alloc |  to   | alloc |   to   | release | delete
9314     // from   | alloc | alloc | from  |  from  | release | delete
9315     // tofrom | alloc |  to   | from  | tofrom | release | delete
9316     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9317         MapType,
9318         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9319                                    MappableExprsHandler::OMP_MAP_FROM));
9320     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9321     llvm::BasicBlock *AllocElseBB =
9322         MapperCGF.createBasicBlock("omp.type.alloc.else");
9323     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9324     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9325     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9326     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9327     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9328     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9329     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9330     MapperCGF.EmitBlock(AllocBB);
9331     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9332         MemberMapType,
9333         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9334                                      MappableExprsHandler::OMP_MAP_FROM)));
9335     MapperCGF.Builder.CreateBr(EndBB);
9336     MapperCGF.EmitBlock(AllocElseBB);
9337     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9338         LeftToFrom,
9339         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9340     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9341     // In case of to, clear OMP_MAP_FROM.
9342     MapperCGF.EmitBlock(ToBB);
9343     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9344         MemberMapType,
9345         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9346     MapperCGF.Builder.CreateBr(EndBB);
9347     MapperCGF.EmitBlock(ToElseBB);
9348     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9349         LeftToFrom,
9350         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9351     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9352     // In case of from, clear OMP_MAP_TO.
9353     MapperCGF.EmitBlock(FromBB);
9354     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9355         MemberMapType,
9356         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9357     // In case of tofrom, do nothing.
9358     MapperCGF.EmitBlock(EndBB);
9359     llvm::PHINode *CurMapType =
9360         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9361     CurMapType->addIncoming(AllocMapType, AllocBB);
9362     CurMapType->addIncoming(ToMapType, ToBB);
9363     CurMapType->addIncoming(FromMapType, FromBB);
9364     CurMapType->addIncoming(MemberMapType, ToElseBB);
9365 
9366     // TODO: call the corresponding mapper function if a user-defined mapper is
9367     // associated with this map clause.
9368     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9369     // data structure.
9370     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9371                                      CurSizeArg, CurMapType};
9372     MapperCGF.EmitRuntimeCall(
9373         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9374         OffloadingArgs);
9375   }
9376 
9377   // Update the pointer to point to the next element that needs to be mapped,
9378   // and check whether we have mapped all elements.
9379   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9380       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9381   PtrPHI->addIncoming(PtrNext, BodyBB);
9382   llvm::Value *IsDone =
9383       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9384   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9385   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9386 
9387   MapperCGF.EmitBlock(ExitBB);
9388   // Emit array deletion if this is an array section and \p MapType indicates
9389   // that deletion is required.
9390   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9391                              ElementSize, DoneBB, /*IsInit=*/false);
9392 
9393   // Emit the function exit block.
9394   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9395   MapperCGF.FinishFunction();
9396   UDMMap.try_emplace(D, Fn);
9397   if (CGF) {
9398     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9399     Decls.second.push_back(D);
9400   }
9401 }
9402 
9403 /// Emit the array initialization or deletion portion for user-defined mapper
9404 /// code generation. First, it evaluates whether an array section is mapped and
9405 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9406 /// true, and \a MapType indicates to not delete this array, array
9407 /// initialization code is generated. If \a IsInit is false, and \a MapType
9408 /// indicates to not this array, array deletion code is generated.
9409 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9410     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9411     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9412     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9413   StringRef Prefix = IsInit ? ".init" : ".del";
9414 
9415   // Evaluate if this is an array section.
9416   llvm::BasicBlock *IsDeleteBB =
9417       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9418   llvm::BasicBlock *BodyBB =
9419       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9420   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9421       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9422   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9423 
9424   // Evaluate if we are going to delete this section.
9425   MapperCGF.EmitBlock(IsDeleteBB);
9426   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9427       MapType,
9428       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9429   llvm::Value *DeleteCond;
9430   if (IsInit) {
9431     DeleteCond = MapperCGF.Builder.CreateIsNull(
9432         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9433   } else {
9434     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9435         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9436   }
9437   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9438 
9439   MapperCGF.EmitBlock(BodyBB);
9440   // Get the array size by multiplying element size and element number (i.e., \p
9441   // Size).
9442   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9443       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9444   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9445   // memory allocation/deletion purpose only.
9446   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9447       MapType,
9448       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9449                                    MappableExprsHandler::OMP_MAP_FROM)));
9450   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9451   // data structure.
9452   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9453   MapperCGF.EmitRuntimeCall(
9454       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9455 }
9456 
9457 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9458     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9459     llvm::Value *DeviceID,
9460     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9461                                      const OMPLoopDirective &D)>
9462         SizeEmitter) {
9463   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9464   const OMPExecutableDirective *TD = &D;
9465   // Get nested teams distribute kind directive, if any.
9466   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9467     TD = getNestedDistributeDirective(CGM.getContext(), D);
9468   if (!TD)
9469     return;
9470   const auto *LD = cast<OMPLoopDirective>(TD);
9471   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9472                                                      PrePostActionTy &) {
9473     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9474       llvm::Value *Args[] = {DeviceID, NumIterations};
9475       CGF.EmitRuntimeCall(
9476           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9477     }
9478   };
9479   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9480 }
9481 
9482 void CGOpenMPRuntime::emitTargetCall(
9483     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9484     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9485     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9486     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9487                                      const OMPLoopDirective &D)>
9488         SizeEmitter) {
9489   if (!CGF.HaveInsertPoint())
9490     return;
9491 
9492   assert(OutlinedFn && "Invalid outlined function!");
9493 
9494   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9495   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9496   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9497   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9498                                             PrePostActionTy &) {
9499     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9500   };
9501   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9502 
9503   CodeGenFunction::OMPTargetDataInfo InputInfo;
9504   llvm::Value *MapTypesArray = nullptr;
9505   // Fill up the pointer arrays and transfer execution to the device.
9506   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9507                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9508                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9509     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9510       // Reverse offloading is not supported, so just execute on the host.
9511       if (RequiresOuterTask) {
9512         CapturedVars.clear();
9513         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9514       }
9515       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9516       return;
9517     }
9518 
9519     // On top of the arrays that were filled up, the target offloading call
9520     // takes as arguments the device id as well as the host pointer. The host
9521     // pointer is used by the runtime library to identify the current target
9522     // region, so it only has to be unique and not necessarily point to
9523     // anything. It could be the pointer to the outlined function that
9524     // implements the target region, but we aren't using that so that the
9525     // compiler doesn't need to keep that, and could therefore inline the host
9526     // function if proven worthwhile during optimization.
9527 
9528     // From this point on, we need to have an ID of the target region defined.
9529     assert(OutlinedFnID && "Invalid outlined function ID!");
9530 
9531     // Emit device ID if any.
9532     llvm::Value *DeviceID;
9533     if (Device.getPointer()) {
9534       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9535               Device.getInt() == OMPC_DEVICE_device_num) &&
9536              "Expected device_num modifier.");
9537       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9538       DeviceID =
9539           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9540     } else {
9541       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9542     }
9543 
9544     // Emit the number of elements in the offloading arrays.
9545     llvm::Value *PointerNum =
9546         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9547 
9548     // Return value of the runtime offloading call.
9549     llvm::Value *Return;
9550 
9551     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9552     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9553 
9554     // Emit tripcount for the target loop-based directive.
9555     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9556 
9557     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9558     // The target region is an outlined function launched by the runtime
9559     // via calls __tgt_target() or __tgt_target_teams().
9560     //
9561     // __tgt_target() launches a target region with one team and one thread,
9562     // executing a serial region.  This master thread may in turn launch
9563     // more threads within its team upon encountering a parallel region,
9564     // however, no additional teams can be launched on the device.
9565     //
9566     // __tgt_target_teams() launches a target region with one or more teams,
9567     // each with one or more threads.  This call is required for target
9568     // constructs such as:
9569     //  'target teams'
9570     //  'target' / 'teams'
9571     //  'target teams distribute parallel for'
9572     //  'target parallel'
9573     // and so on.
9574     //
9575     // Note that on the host and CPU targets, the runtime implementation of
9576     // these calls simply call the outlined function without forking threads.
9577     // The outlined functions themselves have runtime calls to
9578     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9579     // the compiler in emitTeamsCall() and emitParallelCall().
9580     //
9581     // In contrast, on the NVPTX target, the implementation of
9582     // __tgt_target_teams() launches a GPU kernel with the requested number
9583     // of teams and threads so no additional calls to the runtime are required.
9584     if (NumTeams) {
9585       // If we have NumTeams defined this means that we have an enclosed teams
9586       // region. Therefore we also expect to have NumThreads defined. These two
9587       // values should be defined in the presence of a teams directive,
9588       // regardless of having any clauses associated. If the user is using teams
9589       // but no clauses, these two values will be the default that should be
9590       // passed to the runtime library - a 32-bit integer with the value zero.
9591       assert(NumThreads && "Thread limit expression should be available along "
9592                            "with number of teams.");
9593       llvm::Value *OffloadingArgs[] = {DeviceID,
9594                                        OutlinedFnID,
9595                                        PointerNum,
9596                                        InputInfo.BasePointersArray.getPointer(),
9597                                        InputInfo.PointersArray.getPointer(),
9598                                        InputInfo.SizesArray.getPointer(),
9599                                        MapTypesArray,
9600                                        NumTeams,
9601                                        NumThreads};
9602       Return = CGF.EmitRuntimeCall(
9603           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9604                                           : OMPRTL__tgt_target_teams),
9605           OffloadingArgs);
9606     } else {
9607       llvm::Value *OffloadingArgs[] = {DeviceID,
9608                                        OutlinedFnID,
9609                                        PointerNum,
9610                                        InputInfo.BasePointersArray.getPointer(),
9611                                        InputInfo.PointersArray.getPointer(),
9612                                        InputInfo.SizesArray.getPointer(),
9613                                        MapTypesArray};
9614       Return = CGF.EmitRuntimeCall(
9615           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9616                                           : OMPRTL__tgt_target),
9617           OffloadingArgs);
9618     }
9619 
9620     // Check the error code and execute the host version if required.
9621     llvm::BasicBlock *OffloadFailedBlock =
9622         CGF.createBasicBlock("omp_offload.failed");
9623     llvm::BasicBlock *OffloadContBlock =
9624         CGF.createBasicBlock("omp_offload.cont");
9625     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9626     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9627 
9628     CGF.EmitBlock(OffloadFailedBlock);
9629     if (RequiresOuterTask) {
9630       CapturedVars.clear();
9631       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9632     }
9633     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9634     CGF.EmitBranch(OffloadContBlock);
9635 
9636     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9637   };
9638 
9639   // Notify that the host version must be executed.
9640   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9641                     RequiresOuterTask](CodeGenFunction &CGF,
9642                                        PrePostActionTy &) {
9643     if (RequiresOuterTask) {
9644       CapturedVars.clear();
9645       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9646     }
9647     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9648   };
9649 
9650   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9651                           &CapturedVars, RequiresOuterTask,
9652                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9653     // Fill up the arrays with all the captured variables.
9654     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9655     MappableExprsHandler::MapValuesArrayTy Pointers;
9656     MappableExprsHandler::MapValuesArrayTy Sizes;
9657     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9658 
9659     // Get mappable expression information.
9660     MappableExprsHandler MEHandler(D, CGF);
9661     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9662 
9663     auto RI = CS.getCapturedRecordDecl()->field_begin();
9664     auto CV = CapturedVars.begin();
9665     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9666                                               CE = CS.capture_end();
9667          CI != CE; ++CI, ++RI, ++CV) {
9668       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9669       MappableExprsHandler::MapValuesArrayTy CurPointers;
9670       MappableExprsHandler::MapValuesArrayTy CurSizes;
9671       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9672       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9673 
9674       // VLA sizes are passed to the outlined region by copy and do not have map
9675       // information associated.
9676       if (CI->capturesVariableArrayType()) {
9677         CurBasePointers.push_back(*CV);
9678         CurPointers.push_back(*CV);
9679         CurSizes.push_back(CGF.Builder.CreateIntCast(
9680             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9681         // Copy to the device as an argument. No need to retrieve it.
9682         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9683                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9684                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9685       } else {
9686         // If we have any information in the map clause, we use it, otherwise we
9687         // just do a default mapping.
9688         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9689                                          CurSizes, CurMapTypes, PartialStruct);
9690         if (CurBasePointers.empty())
9691           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9692                                            CurPointers, CurSizes, CurMapTypes);
9693         // Generate correct mapping for variables captured by reference in
9694         // lambdas.
9695         if (CI->capturesVariable())
9696           MEHandler.generateInfoForLambdaCaptures(
9697               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9698               CurMapTypes, LambdaPointers);
9699       }
9700       // We expect to have at least an element of information for this capture.
9701       assert(!CurBasePointers.empty() &&
9702              "Non-existing map pointer for capture!");
9703       assert(CurBasePointers.size() == CurPointers.size() &&
9704              CurBasePointers.size() == CurSizes.size() &&
9705              CurBasePointers.size() == CurMapTypes.size() &&
9706              "Inconsistent map information sizes!");
9707 
9708       // If there is an entry in PartialStruct it means we have a struct with
9709       // individual members mapped. Emit an extra combined entry.
9710       if (PartialStruct.Base.isValid())
9711         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9712                                     CurMapTypes, PartialStruct);
9713 
9714       // We need to append the results of this capture to what we already have.
9715       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9716       Pointers.append(CurPointers.begin(), CurPointers.end());
9717       Sizes.append(CurSizes.begin(), CurSizes.end());
9718       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9719     }
9720     // Adjust MEMBER_OF flags for the lambdas captures.
9721     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9722                                               Pointers, MapTypes);
9723     // Map other list items in the map clause which are not captured variables
9724     // but "declare target link" global variables.
9725     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9726                                                MapTypes);
9727 
9728     TargetDataInfo Info;
9729     // Fill up the arrays and create the arguments.
9730     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9731     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9732                                  Info.PointersArray, Info.SizesArray,
9733                                  Info.MapTypesArray, Info);
9734     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9735     InputInfo.BasePointersArray =
9736         Address(Info.BasePointersArray, CGM.getPointerAlign());
9737     InputInfo.PointersArray =
9738         Address(Info.PointersArray, CGM.getPointerAlign());
9739     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9740     MapTypesArray = Info.MapTypesArray;
9741     if (RequiresOuterTask)
9742       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9743     else
9744       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9745   };
9746 
9747   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9748                              CodeGenFunction &CGF, PrePostActionTy &) {
9749     if (RequiresOuterTask) {
9750       CodeGenFunction::OMPTargetDataInfo InputInfo;
9751       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9752     } else {
9753       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9754     }
9755   };
9756 
9757   // If we have a target function ID it means that we need to support
9758   // offloading, otherwise, just execute on the host. We need to execute on host
9759   // regardless of the conditional in the if clause if, e.g., the user do not
9760   // specify target triples.
9761   if (OutlinedFnID) {
9762     if (IfCond) {
9763       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9764     } else {
9765       RegionCodeGenTy ThenRCG(TargetThenGen);
9766       ThenRCG(CGF);
9767     }
9768   } else {
9769     RegionCodeGenTy ElseRCG(TargetElseGen);
9770     ElseRCG(CGF);
9771   }
9772 }
9773 
9774 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9775                                                     StringRef ParentName) {
9776   if (!S)
9777     return;
9778 
9779   // Codegen OMP target directives that offload compute to the device.
9780   bool RequiresDeviceCodegen =
9781       isa<OMPExecutableDirective>(S) &&
9782       isOpenMPTargetExecutionDirective(
9783           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9784 
9785   if (RequiresDeviceCodegen) {
9786     const auto &E = *cast<OMPExecutableDirective>(S);
9787     unsigned DeviceID;
9788     unsigned FileID;
9789     unsigned Line;
9790     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9791                              FileID, Line);
9792 
9793     // Is this a target region that should not be emitted as an entry point? If
9794     // so just signal we are done with this target region.
9795     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9796                                                             ParentName, Line))
9797       return;
9798 
9799     switch (E.getDirectiveKind()) {
9800     case OMPD_target:
9801       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9802                                                    cast<OMPTargetDirective>(E));
9803       break;
9804     case OMPD_target_parallel:
9805       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9806           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9807       break;
9808     case OMPD_target_teams:
9809       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9810           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9811       break;
9812     case OMPD_target_teams_distribute:
9813       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9814           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9815       break;
9816     case OMPD_target_teams_distribute_simd:
9817       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9818           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9819       break;
9820     case OMPD_target_parallel_for:
9821       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9822           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9823       break;
9824     case OMPD_target_parallel_for_simd:
9825       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9826           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9827       break;
9828     case OMPD_target_simd:
9829       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9830           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9831       break;
9832     case OMPD_target_teams_distribute_parallel_for:
9833       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9834           CGM, ParentName,
9835           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9836       break;
9837     case OMPD_target_teams_distribute_parallel_for_simd:
9838       CodeGenFunction::
9839           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9840               CGM, ParentName,
9841               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9842       break;
9843     case OMPD_parallel:
9844     case OMPD_for:
9845     case OMPD_parallel_for:
9846     case OMPD_parallel_master:
9847     case OMPD_parallel_sections:
9848     case OMPD_for_simd:
9849     case OMPD_parallel_for_simd:
9850     case OMPD_cancel:
9851     case OMPD_cancellation_point:
9852     case OMPD_ordered:
9853     case OMPD_threadprivate:
9854     case OMPD_allocate:
9855     case OMPD_task:
9856     case OMPD_simd:
9857     case OMPD_sections:
9858     case OMPD_section:
9859     case OMPD_single:
9860     case OMPD_master:
9861     case OMPD_critical:
9862     case OMPD_taskyield:
9863     case OMPD_barrier:
9864     case OMPD_taskwait:
9865     case OMPD_taskgroup:
9866     case OMPD_atomic:
9867     case OMPD_flush:
9868     case OMPD_depobj:
9869     case OMPD_scan:
9870     case OMPD_teams:
9871     case OMPD_target_data:
9872     case OMPD_target_exit_data:
9873     case OMPD_target_enter_data:
9874     case OMPD_distribute:
9875     case OMPD_distribute_simd:
9876     case OMPD_distribute_parallel_for:
9877     case OMPD_distribute_parallel_for_simd:
9878     case OMPD_teams_distribute:
9879     case OMPD_teams_distribute_simd:
9880     case OMPD_teams_distribute_parallel_for:
9881     case OMPD_teams_distribute_parallel_for_simd:
9882     case OMPD_target_update:
9883     case OMPD_declare_simd:
9884     case OMPD_declare_variant:
9885     case OMPD_begin_declare_variant:
9886     case OMPD_end_declare_variant:
9887     case OMPD_declare_target:
9888     case OMPD_end_declare_target:
9889     case OMPD_declare_reduction:
9890     case OMPD_declare_mapper:
9891     case OMPD_taskloop:
9892     case OMPD_taskloop_simd:
9893     case OMPD_master_taskloop:
9894     case OMPD_master_taskloop_simd:
9895     case OMPD_parallel_master_taskloop:
9896     case OMPD_parallel_master_taskloop_simd:
9897     case OMPD_requires:
9898     case OMPD_unknown:
9899       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9900     }
9901     return;
9902   }
9903 
9904   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9905     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9906       return;
9907 
9908     scanForTargetRegionsFunctions(
9909         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9910     return;
9911   }
9912 
9913   // If this is a lambda function, look into its body.
9914   if (const auto *L = dyn_cast<LambdaExpr>(S))
9915     S = L->getBody();
9916 
9917   // Keep looking for target regions recursively.
9918   for (const Stmt *II : S->children())
9919     scanForTargetRegionsFunctions(II, ParentName);
9920 }
9921 
9922 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9923   // If emitting code for the host, we do not process FD here. Instead we do
9924   // the normal code generation.
9925   if (!CGM.getLangOpts().OpenMPIsDevice) {
9926     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9927       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9928           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9929       // Do not emit device_type(nohost) functions for the host.
9930       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9931         return true;
9932     }
9933     return false;
9934   }
9935 
9936   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9937   // Try to detect target regions in the function.
9938   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9939     StringRef Name = CGM.getMangledName(GD);
9940     scanForTargetRegionsFunctions(FD->getBody(), Name);
9941     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9942         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9943     // Do not emit device_type(nohost) functions for the host.
9944     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9945       return true;
9946   }
9947 
9948   // Do not to emit function if it is not marked as declare target.
9949   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9950          AlreadyEmittedTargetDecls.count(VD) == 0;
9951 }
9952 
9953 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9954   if (!CGM.getLangOpts().OpenMPIsDevice)
9955     return false;
9956 
9957   // Check if there are Ctors/Dtors in this declaration and look for target
9958   // regions in it. We use the complete variant to produce the kernel name
9959   // mangling.
9960   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9961   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9962     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9963       StringRef ParentName =
9964           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9965       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9966     }
9967     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9968       StringRef ParentName =
9969           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9970       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9971     }
9972   }
9973 
9974   // Do not to emit variable if it is not marked as declare target.
9975   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9976       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9977           cast<VarDecl>(GD.getDecl()));
9978   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9979       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9980        HasRequiresUnifiedSharedMemory)) {
9981     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9982     return true;
9983   }
9984   return false;
9985 }
9986 
9987 llvm::Constant *
9988 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9989                                                 const VarDecl *VD) {
9990   assert(VD->getType().isConstant(CGM.getContext()) &&
9991          "Expected constant variable.");
9992   StringRef VarName;
9993   llvm::Constant *Addr;
9994   llvm::GlobalValue::LinkageTypes Linkage;
9995   QualType Ty = VD->getType();
9996   SmallString<128> Buffer;
9997   {
9998     unsigned DeviceID;
9999     unsigned FileID;
10000     unsigned Line;
10001     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10002                              FileID, Line);
10003     llvm::raw_svector_ostream OS(Buffer);
10004     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10005        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10006     VarName = OS.str();
10007   }
10008   Linkage = llvm::GlobalValue::InternalLinkage;
10009   Addr =
10010       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10011                                   getDefaultFirstprivateAddressSpace());
10012   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10013   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10014   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10015   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10016       VarName, Addr, VarSize,
10017       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10018   return Addr;
10019 }
10020 
10021 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10022                                                    llvm::Constant *Addr) {
10023   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10024       !CGM.getLangOpts().OpenMPIsDevice)
10025     return;
10026   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10027       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10028   if (!Res) {
10029     if (CGM.getLangOpts().OpenMPIsDevice) {
10030       // Register non-target variables being emitted in device code (debug info
10031       // may cause this).
10032       StringRef VarName = CGM.getMangledName(VD);
10033       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10034     }
10035     return;
10036   }
10037   // Register declare target variables.
10038   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10039   StringRef VarName;
10040   CharUnits VarSize;
10041   llvm::GlobalValue::LinkageTypes Linkage;
10042 
10043   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10044       !HasRequiresUnifiedSharedMemory) {
10045     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10046     VarName = CGM.getMangledName(VD);
10047     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10048       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10049       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10050     } else {
10051       VarSize = CharUnits::Zero();
10052     }
10053     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10054     // Temp solution to prevent optimizations of the internal variables.
10055     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10056       std::string RefName = getName({VarName, "ref"});
10057       if (!CGM.GetGlobalValue(RefName)) {
10058         llvm::Constant *AddrRef =
10059             getOrCreateInternalVariable(Addr->getType(), RefName);
10060         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10061         GVAddrRef->setConstant(/*Val=*/true);
10062         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10063         GVAddrRef->setInitializer(Addr);
10064         CGM.addCompilerUsedGlobal(GVAddrRef);
10065       }
10066     }
10067   } else {
10068     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10069             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10070              HasRequiresUnifiedSharedMemory)) &&
10071            "Declare target attribute must link or to with unified memory.");
10072     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10073       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10074     else
10075       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10076 
10077     if (CGM.getLangOpts().OpenMPIsDevice) {
10078       VarName = Addr->getName();
10079       Addr = nullptr;
10080     } else {
10081       VarName = getAddrOfDeclareTargetVar(VD).getName();
10082       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10083     }
10084     VarSize = CGM.getPointerSize();
10085     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10086   }
10087 
10088   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10089       VarName, Addr, VarSize, Flags, Linkage);
10090 }
10091 
10092 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10093   if (isa<FunctionDecl>(GD.getDecl()) ||
10094       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10095     return emitTargetFunctions(GD);
10096 
10097   return emitTargetGlobalVariable(GD);
10098 }
10099 
10100 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10101   for (const VarDecl *VD : DeferredGlobalVariables) {
10102     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10103         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10104     if (!Res)
10105       continue;
10106     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10107         !HasRequiresUnifiedSharedMemory) {
10108       CGM.EmitGlobal(VD);
10109     } else {
10110       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10111               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10112                HasRequiresUnifiedSharedMemory)) &&
10113              "Expected link clause or to clause with unified memory.");
10114       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10115     }
10116   }
10117 }
10118 
10119 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10120     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10121   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10122          " Expected target-based directive.");
10123 }
10124 
10125 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10126   for (const OMPClause *Clause : D->clauselists()) {
10127     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10128       HasRequiresUnifiedSharedMemory = true;
10129     } else if (const auto *AC =
10130                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10131       switch (AC->getAtomicDefaultMemOrderKind()) {
10132       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10133         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10134         break;
10135       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10136         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10137         break;
10138       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10139         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10140         break;
10141       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10142         break;
10143       }
10144     }
10145   }
10146 }
10147 
10148 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10149   return RequiresAtomicOrdering;
10150 }
10151 
10152 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10153                                                        LangAS &AS) {
10154   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10155     return false;
10156   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10157   switch(A->getAllocatorType()) {
10158   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10159   // Not supported, fallback to the default mem space.
10160   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10161   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10162   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10163   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10164   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10165   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10166   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10167     AS = LangAS::Default;
10168     return true;
10169   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10170     llvm_unreachable("Expected predefined allocator for the variables with the "
10171                      "static storage.");
10172   }
10173   return false;
10174 }
10175 
10176 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10177   return HasRequiresUnifiedSharedMemory;
10178 }
10179 
10180 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10181     CodeGenModule &CGM)
10182     : CGM(CGM) {
10183   if (CGM.getLangOpts().OpenMPIsDevice) {
10184     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10185     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10186   }
10187 }
10188 
10189 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10190   if (CGM.getLangOpts().OpenMPIsDevice)
10191     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10192 }
10193 
10194 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10195   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10196     return true;
10197 
10198   const auto *D = cast<FunctionDecl>(GD.getDecl());
10199   // Do not to emit function if it is marked as declare target as it was already
10200   // emitted.
10201   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10202     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10203       if (auto *F = dyn_cast_or_null<llvm::Function>(
10204               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10205         return !F->isDeclaration();
10206       return false;
10207     }
10208     return true;
10209   }
10210 
10211   return !AlreadyEmittedTargetDecls.insert(D).second;
10212 }
10213 
10214 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10215   // If we don't have entries or if we are emitting code for the device, we
10216   // don't need to do anything.
10217   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10218       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10219       (OffloadEntriesInfoManager.empty() &&
10220        !HasEmittedDeclareTargetRegion &&
10221        !HasEmittedTargetRegion))
10222     return nullptr;
10223 
10224   // Create and register the function that handles the requires directives.
10225   ASTContext &C = CGM.getContext();
10226 
10227   llvm::Function *RequiresRegFn;
10228   {
10229     CodeGenFunction CGF(CGM);
10230     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10231     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10232     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10233     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10234     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10235     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10236     // TODO: check for other requires clauses.
10237     // The requires directive takes effect only when a target region is
10238     // present in the compilation unit. Otherwise it is ignored and not
10239     // passed to the runtime. This avoids the runtime from throwing an error
10240     // for mismatching requires clauses across compilation units that don't
10241     // contain at least 1 target region.
10242     assert((HasEmittedTargetRegion ||
10243             HasEmittedDeclareTargetRegion ||
10244             !OffloadEntriesInfoManager.empty()) &&
10245            "Target or declare target region expected.");
10246     if (HasRequiresUnifiedSharedMemory)
10247       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10248     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10249         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10250     CGF.FinishFunction();
10251   }
10252   return RequiresRegFn;
10253 }
10254 
10255 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10256                                     const OMPExecutableDirective &D,
10257                                     SourceLocation Loc,
10258                                     llvm::Function *OutlinedFn,
10259                                     ArrayRef<llvm::Value *> CapturedVars) {
10260   if (!CGF.HaveInsertPoint())
10261     return;
10262 
10263   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10264   CodeGenFunction::RunCleanupsScope Scope(CGF);
10265 
10266   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10267   llvm::Value *Args[] = {
10268       RTLoc,
10269       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10270       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10271   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10272   RealArgs.append(std::begin(Args), std::end(Args));
10273   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10274 
10275   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10276   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10277 }
10278 
10279 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10280                                          const Expr *NumTeams,
10281                                          const Expr *ThreadLimit,
10282                                          SourceLocation Loc) {
10283   if (!CGF.HaveInsertPoint())
10284     return;
10285 
10286   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10287 
10288   llvm::Value *NumTeamsVal =
10289       NumTeams
10290           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10291                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10292           : CGF.Builder.getInt32(0);
10293 
10294   llvm::Value *ThreadLimitVal =
10295       ThreadLimit
10296           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10297                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10298           : CGF.Builder.getInt32(0);
10299 
10300   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10301   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10302                                      ThreadLimitVal};
10303   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10304                       PushNumTeamsArgs);
10305 }
10306 
10307 void CGOpenMPRuntime::emitTargetDataCalls(
10308     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10309     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10310   if (!CGF.HaveInsertPoint())
10311     return;
10312 
10313   // Action used to replace the default codegen action and turn privatization
10314   // off.
10315   PrePostActionTy NoPrivAction;
10316 
10317   // Generate the code for the opening of the data environment. Capture all the
10318   // arguments of the runtime call by reference because they are used in the
10319   // closing of the region.
10320   auto &&BeginThenGen = [this, &D, Device, &Info,
10321                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10322     // Fill up the arrays with all the mapped variables.
10323     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10324     MappableExprsHandler::MapValuesArrayTy Pointers;
10325     MappableExprsHandler::MapValuesArrayTy Sizes;
10326     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10327 
10328     // Get map clause information.
10329     MappableExprsHandler MCHandler(D, CGF);
10330     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10331 
10332     // Fill up the arrays and create the arguments.
10333     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10334 
10335     llvm::Value *BasePointersArrayArg = nullptr;
10336     llvm::Value *PointersArrayArg = nullptr;
10337     llvm::Value *SizesArrayArg = nullptr;
10338     llvm::Value *MapTypesArrayArg = nullptr;
10339     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10340                                  SizesArrayArg, MapTypesArrayArg, Info);
10341 
10342     // Emit device ID if any.
10343     llvm::Value *DeviceID = nullptr;
10344     if (Device) {
10345       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10346                                            CGF.Int64Ty, /*isSigned=*/true);
10347     } else {
10348       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10349     }
10350 
10351     // Emit the number of elements in the offloading arrays.
10352     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10353 
10354     llvm::Value *OffloadingArgs[] = {
10355         DeviceID,         PointerNum,    BasePointersArrayArg,
10356         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10357     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10358                         OffloadingArgs);
10359 
10360     // If device pointer privatization is required, emit the body of the region
10361     // here. It will have to be duplicated: with and without privatization.
10362     if (!Info.CaptureDeviceAddrMap.empty())
10363       CodeGen(CGF);
10364   };
10365 
10366   // Generate code for the closing of the data region.
10367   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10368                                             PrePostActionTy &) {
10369     assert(Info.isValid() && "Invalid data environment closing arguments.");
10370 
10371     llvm::Value *BasePointersArrayArg = nullptr;
10372     llvm::Value *PointersArrayArg = nullptr;
10373     llvm::Value *SizesArrayArg = nullptr;
10374     llvm::Value *MapTypesArrayArg = nullptr;
10375     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10376                                  SizesArrayArg, MapTypesArrayArg, Info);
10377 
10378     // Emit device ID if any.
10379     llvm::Value *DeviceID = nullptr;
10380     if (Device) {
10381       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10382                                            CGF.Int64Ty, /*isSigned=*/true);
10383     } else {
10384       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10385     }
10386 
10387     // Emit the number of elements in the offloading arrays.
10388     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10389 
10390     llvm::Value *OffloadingArgs[] = {
10391         DeviceID,         PointerNum,    BasePointersArrayArg,
10392         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10393     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10394                         OffloadingArgs);
10395   };
10396 
10397   // If we need device pointer privatization, we need to emit the body of the
10398   // region with no privatization in the 'else' branch of the conditional.
10399   // Otherwise, we don't have to do anything.
10400   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10401                                                          PrePostActionTy &) {
10402     if (!Info.CaptureDeviceAddrMap.empty()) {
10403       CodeGen.setAction(NoPrivAction);
10404       CodeGen(CGF);
10405     }
10406   };
10407 
10408   // We don't have to do anything to close the region if the if clause evaluates
10409   // to false.
10410   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10411 
10412   if (IfCond) {
10413     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10414   } else {
10415     RegionCodeGenTy RCG(BeginThenGen);
10416     RCG(CGF);
10417   }
10418 
10419   // If we don't require privatization of device pointers, we emit the body in
10420   // between the runtime calls. This avoids duplicating the body code.
10421   if (Info.CaptureDeviceAddrMap.empty()) {
10422     CodeGen.setAction(NoPrivAction);
10423     CodeGen(CGF);
10424   }
10425 
10426   if (IfCond) {
10427     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10428   } else {
10429     RegionCodeGenTy RCG(EndThenGen);
10430     RCG(CGF);
10431   }
10432 }
10433 
10434 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10435     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10436     const Expr *Device) {
10437   if (!CGF.HaveInsertPoint())
10438     return;
10439 
10440   assert((isa<OMPTargetEnterDataDirective>(D) ||
10441           isa<OMPTargetExitDataDirective>(D) ||
10442           isa<OMPTargetUpdateDirective>(D)) &&
10443          "Expecting either target enter, exit data, or update directives.");
10444 
10445   CodeGenFunction::OMPTargetDataInfo InputInfo;
10446   llvm::Value *MapTypesArray = nullptr;
10447   // Generate the code for the opening of the data environment.
10448   auto &&ThenGen = [this, &D, Device, &InputInfo,
10449                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10450     // Emit device ID if any.
10451     llvm::Value *DeviceID = nullptr;
10452     if (Device) {
10453       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10454                                            CGF.Int64Ty, /*isSigned=*/true);
10455     } else {
10456       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10457     }
10458 
10459     // Emit the number of elements in the offloading arrays.
10460     llvm::Constant *PointerNum =
10461         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10462 
10463     llvm::Value *OffloadingArgs[] = {DeviceID,
10464                                      PointerNum,
10465                                      InputInfo.BasePointersArray.getPointer(),
10466                                      InputInfo.PointersArray.getPointer(),
10467                                      InputInfo.SizesArray.getPointer(),
10468                                      MapTypesArray};
10469 
10470     // Select the right runtime function call for each expected standalone
10471     // directive.
10472     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10473     OpenMPRTLFunction RTLFn;
10474     switch (D.getDirectiveKind()) {
10475     case OMPD_target_enter_data:
10476       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10477                         : OMPRTL__tgt_target_data_begin;
10478       break;
10479     case OMPD_target_exit_data:
10480       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10481                         : OMPRTL__tgt_target_data_end;
10482       break;
10483     case OMPD_target_update:
10484       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10485                         : OMPRTL__tgt_target_data_update;
10486       break;
10487     case OMPD_parallel:
10488     case OMPD_for:
10489     case OMPD_parallel_for:
10490     case OMPD_parallel_master:
10491     case OMPD_parallel_sections:
10492     case OMPD_for_simd:
10493     case OMPD_parallel_for_simd:
10494     case OMPD_cancel:
10495     case OMPD_cancellation_point:
10496     case OMPD_ordered:
10497     case OMPD_threadprivate:
10498     case OMPD_allocate:
10499     case OMPD_task:
10500     case OMPD_simd:
10501     case OMPD_sections:
10502     case OMPD_section:
10503     case OMPD_single:
10504     case OMPD_master:
10505     case OMPD_critical:
10506     case OMPD_taskyield:
10507     case OMPD_barrier:
10508     case OMPD_taskwait:
10509     case OMPD_taskgroup:
10510     case OMPD_atomic:
10511     case OMPD_flush:
10512     case OMPD_depobj:
10513     case OMPD_scan:
10514     case OMPD_teams:
10515     case OMPD_target_data:
10516     case OMPD_distribute:
10517     case OMPD_distribute_simd:
10518     case OMPD_distribute_parallel_for:
10519     case OMPD_distribute_parallel_for_simd:
10520     case OMPD_teams_distribute:
10521     case OMPD_teams_distribute_simd:
10522     case OMPD_teams_distribute_parallel_for:
10523     case OMPD_teams_distribute_parallel_for_simd:
10524     case OMPD_declare_simd:
10525     case OMPD_declare_variant:
10526     case OMPD_begin_declare_variant:
10527     case OMPD_end_declare_variant:
10528     case OMPD_declare_target:
10529     case OMPD_end_declare_target:
10530     case OMPD_declare_reduction:
10531     case OMPD_declare_mapper:
10532     case OMPD_taskloop:
10533     case OMPD_taskloop_simd:
10534     case OMPD_master_taskloop:
10535     case OMPD_master_taskloop_simd:
10536     case OMPD_parallel_master_taskloop:
10537     case OMPD_parallel_master_taskloop_simd:
10538     case OMPD_target:
10539     case OMPD_target_simd:
10540     case OMPD_target_teams_distribute:
10541     case OMPD_target_teams_distribute_simd:
10542     case OMPD_target_teams_distribute_parallel_for:
10543     case OMPD_target_teams_distribute_parallel_for_simd:
10544     case OMPD_target_teams:
10545     case OMPD_target_parallel:
10546     case OMPD_target_parallel_for:
10547     case OMPD_target_parallel_for_simd:
10548     case OMPD_requires:
10549     case OMPD_unknown:
10550       llvm_unreachable("Unexpected standalone target data directive.");
10551       break;
10552     }
10553     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10554   };
10555 
10556   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10557                              CodeGenFunction &CGF, PrePostActionTy &) {
10558     // Fill up the arrays with all the mapped variables.
10559     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10560     MappableExprsHandler::MapValuesArrayTy Pointers;
10561     MappableExprsHandler::MapValuesArrayTy Sizes;
10562     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10563 
10564     // Get map clause information.
10565     MappableExprsHandler MEHandler(D, CGF);
10566     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10567 
10568     TargetDataInfo Info;
10569     // Fill up the arrays and create the arguments.
10570     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10571     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10572                                  Info.PointersArray, Info.SizesArray,
10573                                  Info.MapTypesArray, Info);
10574     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10575     InputInfo.BasePointersArray =
10576         Address(Info.BasePointersArray, CGM.getPointerAlign());
10577     InputInfo.PointersArray =
10578         Address(Info.PointersArray, CGM.getPointerAlign());
10579     InputInfo.SizesArray =
10580         Address(Info.SizesArray, CGM.getPointerAlign());
10581     MapTypesArray = Info.MapTypesArray;
10582     if (D.hasClausesOfKind<OMPDependClause>())
10583       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10584     else
10585       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10586   };
10587 
10588   if (IfCond) {
10589     emitIfClause(CGF, IfCond, TargetThenGen,
10590                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10591   } else {
10592     RegionCodeGenTy ThenRCG(TargetThenGen);
10593     ThenRCG(CGF);
10594   }
10595 }
10596 
10597 namespace {
10598   /// Kind of parameter in a function with 'declare simd' directive.
10599   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10600   /// Attribute set of the parameter.
10601   struct ParamAttrTy {
10602     ParamKindTy Kind = Vector;
10603     llvm::APSInt StrideOrArg;
10604     llvm::APSInt Alignment;
10605   };
10606 } // namespace
10607 
10608 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10609                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10610   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10611   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10612   // of that clause. The VLEN value must be power of 2.
10613   // In other case the notion of the function`s "characteristic data type" (CDT)
10614   // is used to compute the vector length.
10615   // CDT is defined in the following order:
10616   //   a) For non-void function, the CDT is the return type.
10617   //   b) If the function has any non-uniform, non-linear parameters, then the
10618   //   CDT is the type of the first such parameter.
10619   //   c) If the CDT determined by a) or b) above is struct, union, or class
10620   //   type which is pass-by-value (except for the type that maps to the
10621   //   built-in complex data type), the characteristic data type is int.
10622   //   d) If none of the above three cases is applicable, the CDT is int.
10623   // The VLEN is then determined based on the CDT and the size of vector
10624   // register of that ISA for which current vector version is generated. The
10625   // VLEN is computed using the formula below:
10626   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10627   // where vector register size specified in section 3.2.1 Registers and the
10628   // Stack Frame of original AMD64 ABI document.
10629   QualType RetType = FD->getReturnType();
10630   if (RetType.isNull())
10631     return 0;
10632   ASTContext &C = FD->getASTContext();
10633   QualType CDT;
10634   if (!RetType.isNull() && !RetType->isVoidType()) {
10635     CDT = RetType;
10636   } else {
10637     unsigned Offset = 0;
10638     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10639       if (ParamAttrs[Offset].Kind == Vector)
10640         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10641       ++Offset;
10642     }
10643     if (CDT.isNull()) {
10644       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10645         if (ParamAttrs[I + Offset].Kind == Vector) {
10646           CDT = FD->getParamDecl(I)->getType();
10647           break;
10648         }
10649       }
10650     }
10651   }
10652   if (CDT.isNull())
10653     CDT = C.IntTy;
10654   CDT = CDT->getCanonicalTypeUnqualified();
10655   if (CDT->isRecordType() || CDT->isUnionType())
10656     CDT = C.IntTy;
10657   return C.getTypeSize(CDT);
10658 }
10659 
10660 static void
10661 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10662                            const llvm::APSInt &VLENVal,
10663                            ArrayRef<ParamAttrTy> ParamAttrs,
10664                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10665   struct ISADataTy {
10666     char ISA;
10667     unsigned VecRegSize;
10668   };
10669   ISADataTy ISAData[] = {
10670       {
10671           'b', 128
10672       }, // SSE
10673       {
10674           'c', 256
10675       }, // AVX
10676       {
10677           'd', 256
10678       }, // AVX2
10679       {
10680           'e', 512
10681       }, // AVX512
10682   };
10683   llvm::SmallVector<char, 2> Masked;
10684   switch (State) {
10685   case OMPDeclareSimdDeclAttr::BS_Undefined:
10686     Masked.push_back('N');
10687     Masked.push_back('M');
10688     break;
10689   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10690     Masked.push_back('N');
10691     break;
10692   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10693     Masked.push_back('M');
10694     break;
10695   }
10696   for (char Mask : Masked) {
10697     for (const ISADataTy &Data : ISAData) {
10698       SmallString<256> Buffer;
10699       llvm::raw_svector_ostream Out(Buffer);
10700       Out << "_ZGV" << Data.ISA << Mask;
10701       if (!VLENVal) {
10702         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10703         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10704         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10705       } else {
10706         Out << VLENVal;
10707       }
10708       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10709         switch (ParamAttr.Kind){
10710         case LinearWithVarStride:
10711           Out << 's' << ParamAttr.StrideOrArg;
10712           break;
10713         case Linear:
10714           Out << 'l';
10715           if (!!ParamAttr.StrideOrArg)
10716             Out << ParamAttr.StrideOrArg;
10717           break;
10718         case Uniform:
10719           Out << 'u';
10720           break;
10721         case Vector:
10722           Out << 'v';
10723           break;
10724         }
10725         if (!!ParamAttr.Alignment)
10726           Out << 'a' << ParamAttr.Alignment;
10727       }
10728       Out << '_' << Fn->getName();
10729       Fn->addFnAttr(Out.str());
10730     }
10731   }
10732 }
10733 
10734 // This are the Functions that are needed to mangle the name of the
10735 // vector functions generated by the compiler, according to the rules
10736 // defined in the "Vector Function ABI specifications for AArch64",
10737 // available at
10738 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10739 
10740 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10741 ///
10742 /// TODO: Need to implement the behavior for reference marked with a
10743 /// var or no linear modifiers (1.b in the section). For this, we
10744 /// need to extend ParamKindTy to support the linear modifiers.
10745 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10746   QT = QT.getCanonicalType();
10747 
10748   if (QT->isVoidType())
10749     return false;
10750 
10751   if (Kind == ParamKindTy::Uniform)
10752     return false;
10753 
10754   if (Kind == ParamKindTy::Linear)
10755     return false;
10756 
10757   // TODO: Handle linear references with modifiers
10758 
10759   if (Kind == ParamKindTy::LinearWithVarStride)
10760     return false;
10761 
10762   return true;
10763 }
10764 
10765 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10766 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10767   QT = QT.getCanonicalType();
10768   unsigned Size = C.getTypeSize(QT);
10769 
10770   // Only scalars and complex within 16 bytes wide set PVB to true.
10771   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10772     return false;
10773 
10774   if (QT->isFloatingType())
10775     return true;
10776 
10777   if (QT->isIntegerType())
10778     return true;
10779 
10780   if (QT->isPointerType())
10781     return true;
10782 
10783   // TODO: Add support for complex types (section 3.1.2, item 2).
10784 
10785   return false;
10786 }
10787 
10788 /// Computes the lane size (LS) of a return type or of an input parameter,
10789 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10790 /// TODO: Add support for references, section 3.2.1, item 1.
10791 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10792   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10793     QualType PTy = QT.getCanonicalType()->getPointeeType();
10794     if (getAArch64PBV(PTy, C))
10795       return C.getTypeSize(PTy);
10796   }
10797   if (getAArch64PBV(QT, C))
10798     return C.getTypeSize(QT);
10799 
10800   return C.getTypeSize(C.getUIntPtrType());
10801 }
10802 
10803 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10804 // signature of the scalar function, as defined in 3.2.2 of the
10805 // AAVFABI.
10806 static std::tuple<unsigned, unsigned, bool>
10807 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10808   QualType RetType = FD->getReturnType().getCanonicalType();
10809 
10810   ASTContext &C = FD->getASTContext();
10811 
10812   bool OutputBecomesInput = false;
10813 
10814   llvm::SmallVector<unsigned, 8> Sizes;
10815   if (!RetType->isVoidType()) {
10816     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10817     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10818       OutputBecomesInput = true;
10819   }
10820   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10821     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10822     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10823   }
10824 
10825   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10826   // The LS of a function parameter / return value can only be a power
10827   // of 2, starting from 8 bits, up to 128.
10828   assert(std::all_of(Sizes.begin(), Sizes.end(),
10829                      [](unsigned Size) {
10830                        return Size == 8 || Size == 16 || Size == 32 ||
10831                               Size == 64 || Size == 128;
10832                      }) &&
10833          "Invalid size");
10834 
10835   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10836                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10837                          OutputBecomesInput);
10838 }
10839 
10840 /// Mangle the parameter part of the vector function name according to
10841 /// their OpenMP classification. The mangling function is defined in
10842 /// section 3.5 of the AAVFABI.
10843 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10844   SmallString<256> Buffer;
10845   llvm::raw_svector_ostream Out(Buffer);
10846   for (const auto &ParamAttr : ParamAttrs) {
10847     switch (ParamAttr.Kind) {
10848     case LinearWithVarStride:
10849       Out << "ls" << ParamAttr.StrideOrArg;
10850       break;
10851     case Linear:
10852       Out << 'l';
10853       // Don't print the step value if it is not present or if it is
10854       // equal to 1.
10855       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10856         Out << ParamAttr.StrideOrArg;
10857       break;
10858     case Uniform:
10859       Out << 'u';
10860       break;
10861     case Vector:
10862       Out << 'v';
10863       break;
10864     }
10865 
10866     if (!!ParamAttr.Alignment)
10867       Out << 'a' << ParamAttr.Alignment;
10868   }
10869 
10870   return std::string(Out.str());
10871 }
10872 
10873 // Function used to add the attribute. The parameter `VLEN` is
10874 // templated to allow the use of "x" when targeting scalable functions
10875 // for SVE.
10876 template <typename T>
10877 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10878                                  char ISA, StringRef ParSeq,
10879                                  StringRef MangledName, bool OutputBecomesInput,
10880                                  llvm::Function *Fn) {
10881   SmallString<256> Buffer;
10882   llvm::raw_svector_ostream Out(Buffer);
10883   Out << Prefix << ISA << LMask << VLEN;
10884   if (OutputBecomesInput)
10885     Out << "v";
10886   Out << ParSeq << "_" << MangledName;
10887   Fn->addFnAttr(Out.str());
10888 }
10889 
10890 // Helper function to generate the Advanced SIMD names depending on
10891 // the value of the NDS when simdlen is not present.
10892 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10893                                       StringRef Prefix, char ISA,
10894                                       StringRef ParSeq, StringRef MangledName,
10895                                       bool OutputBecomesInput,
10896                                       llvm::Function *Fn) {
10897   switch (NDS) {
10898   case 8:
10899     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10900                          OutputBecomesInput, Fn);
10901     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10902                          OutputBecomesInput, Fn);
10903     break;
10904   case 16:
10905     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10906                          OutputBecomesInput, Fn);
10907     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10908                          OutputBecomesInput, Fn);
10909     break;
10910   case 32:
10911     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10912                          OutputBecomesInput, Fn);
10913     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10914                          OutputBecomesInput, Fn);
10915     break;
10916   case 64:
10917   case 128:
10918     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10919                          OutputBecomesInput, Fn);
10920     break;
10921   default:
10922     llvm_unreachable("Scalar type is too wide.");
10923   }
10924 }
10925 
10926 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10927 static void emitAArch64DeclareSimdFunction(
10928     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10929     ArrayRef<ParamAttrTy> ParamAttrs,
10930     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10931     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10932 
10933   // Get basic data for building the vector signature.
10934   const auto Data = getNDSWDS(FD, ParamAttrs);
10935   const unsigned NDS = std::get<0>(Data);
10936   const unsigned WDS = std::get<1>(Data);
10937   const bool OutputBecomesInput = std::get<2>(Data);
10938 
10939   // Check the values provided via `simdlen` by the user.
10940   // 1. A `simdlen(1)` doesn't produce vector signatures,
10941   if (UserVLEN == 1) {
10942     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10943         DiagnosticsEngine::Warning,
10944         "The clause simdlen(1) has no effect when targeting aarch64.");
10945     CGM.getDiags().Report(SLoc, DiagID);
10946     return;
10947   }
10948 
10949   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10950   // Advanced SIMD output.
10951   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10952     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10953         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10954                                     "power of 2 when targeting Advanced SIMD.");
10955     CGM.getDiags().Report(SLoc, DiagID);
10956     return;
10957   }
10958 
10959   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10960   // limits.
10961   if (ISA == 's' && UserVLEN != 0) {
10962     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10963       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10964           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10965                                       "lanes in the architectural constraints "
10966                                       "for SVE (min is 128-bit, max is "
10967                                       "2048-bit, by steps of 128-bit)");
10968       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10969       return;
10970     }
10971   }
10972 
10973   // Sort out parameter sequence.
10974   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10975   StringRef Prefix = "_ZGV";
10976   // Generate simdlen from user input (if any).
10977   if (UserVLEN) {
10978     if (ISA == 's') {
10979       // SVE generates only a masked function.
10980       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10981                            OutputBecomesInput, Fn);
10982     } else {
10983       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10984       // Advanced SIMD generates one or two functions, depending on
10985       // the `[not]inbranch` clause.
10986       switch (State) {
10987       case OMPDeclareSimdDeclAttr::BS_Undefined:
10988         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10989                              OutputBecomesInput, Fn);
10990         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10991                              OutputBecomesInput, Fn);
10992         break;
10993       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10994         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10995                              OutputBecomesInput, Fn);
10996         break;
10997       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10998         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10999                              OutputBecomesInput, Fn);
11000         break;
11001       }
11002     }
11003   } else {
11004     // If no user simdlen is provided, follow the AAVFABI rules for
11005     // generating the vector length.
11006     if (ISA == 's') {
11007       // SVE, section 3.4.1, item 1.
11008       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11009                            OutputBecomesInput, Fn);
11010     } else {
11011       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11012       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11013       // two vector names depending on the use of the clause
11014       // `[not]inbranch`.
11015       switch (State) {
11016       case OMPDeclareSimdDeclAttr::BS_Undefined:
11017         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11018                                   OutputBecomesInput, Fn);
11019         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11020                                   OutputBecomesInput, Fn);
11021         break;
11022       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11023         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11024                                   OutputBecomesInput, Fn);
11025         break;
11026       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11027         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11028                                   OutputBecomesInput, Fn);
11029         break;
11030       }
11031     }
11032   }
11033 }
11034 
11035 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11036                                               llvm::Function *Fn) {
11037   ASTContext &C = CGM.getContext();
11038   FD = FD->getMostRecentDecl();
11039   // Map params to their positions in function decl.
11040   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11041   if (isa<CXXMethodDecl>(FD))
11042     ParamPositions.try_emplace(FD, 0);
11043   unsigned ParamPos = ParamPositions.size();
11044   for (const ParmVarDecl *P : FD->parameters()) {
11045     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11046     ++ParamPos;
11047   }
11048   while (FD) {
11049     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11050       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11051       // Mark uniform parameters.
11052       for (const Expr *E : Attr->uniforms()) {
11053         E = E->IgnoreParenImpCasts();
11054         unsigned Pos;
11055         if (isa<CXXThisExpr>(E)) {
11056           Pos = ParamPositions[FD];
11057         } else {
11058           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11059                                 ->getCanonicalDecl();
11060           Pos = ParamPositions[PVD];
11061         }
11062         ParamAttrs[Pos].Kind = Uniform;
11063       }
11064       // Get alignment info.
11065       auto NI = Attr->alignments_begin();
11066       for (const Expr *E : Attr->aligneds()) {
11067         E = E->IgnoreParenImpCasts();
11068         unsigned Pos;
11069         QualType ParmTy;
11070         if (isa<CXXThisExpr>(E)) {
11071           Pos = ParamPositions[FD];
11072           ParmTy = E->getType();
11073         } else {
11074           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11075                                 ->getCanonicalDecl();
11076           Pos = ParamPositions[PVD];
11077           ParmTy = PVD->getType();
11078         }
11079         ParamAttrs[Pos].Alignment =
11080             (*NI)
11081                 ? (*NI)->EvaluateKnownConstInt(C)
11082                 : llvm::APSInt::getUnsigned(
11083                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11084                           .getQuantity());
11085         ++NI;
11086       }
11087       // Mark linear parameters.
11088       auto SI = Attr->steps_begin();
11089       auto MI = Attr->modifiers_begin();
11090       for (const Expr *E : Attr->linears()) {
11091         E = E->IgnoreParenImpCasts();
11092         unsigned Pos;
11093         if (isa<CXXThisExpr>(E)) {
11094           Pos = ParamPositions[FD];
11095         } else {
11096           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11097                                 ->getCanonicalDecl();
11098           Pos = ParamPositions[PVD];
11099         }
11100         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11101         ParamAttr.Kind = Linear;
11102         if (*SI) {
11103           Expr::EvalResult Result;
11104           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11105             if (const auto *DRE =
11106                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11107               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11108                 ParamAttr.Kind = LinearWithVarStride;
11109                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11110                     ParamPositions[StridePVD->getCanonicalDecl()]);
11111               }
11112             }
11113           } else {
11114             ParamAttr.StrideOrArg = Result.Val.getInt();
11115           }
11116         }
11117         ++SI;
11118         ++MI;
11119       }
11120       llvm::APSInt VLENVal;
11121       SourceLocation ExprLoc;
11122       const Expr *VLENExpr = Attr->getSimdlen();
11123       if (VLENExpr) {
11124         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11125         ExprLoc = VLENExpr->getExprLoc();
11126       }
11127       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11128       if (CGM.getTriple().isX86()) {
11129         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11130       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11131         unsigned VLEN = VLENVal.getExtValue();
11132         StringRef MangledName = Fn->getName();
11133         if (CGM.getTarget().hasFeature("sve"))
11134           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11135                                          MangledName, 's', 128, Fn, ExprLoc);
11136         if (CGM.getTarget().hasFeature("neon"))
11137           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11138                                          MangledName, 'n', 128, Fn, ExprLoc);
11139       }
11140     }
11141     FD = FD->getPreviousDecl();
11142   }
11143 }
11144 
11145 namespace {
11146 /// Cleanup action for doacross support.
11147 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11148 public:
11149   static const int DoacrossFinArgs = 2;
11150 
11151 private:
11152   llvm::FunctionCallee RTLFn;
11153   llvm::Value *Args[DoacrossFinArgs];
11154 
11155 public:
11156   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11157                     ArrayRef<llvm::Value *> CallArgs)
11158       : RTLFn(RTLFn) {
11159     assert(CallArgs.size() == DoacrossFinArgs);
11160     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11161   }
11162   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11163     if (!CGF.HaveInsertPoint())
11164       return;
11165     CGF.EmitRuntimeCall(RTLFn, Args);
11166   }
11167 };
11168 } // namespace
11169 
11170 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11171                                        const OMPLoopDirective &D,
11172                                        ArrayRef<Expr *> NumIterations) {
11173   if (!CGF.HaveInsertPoint())
11174     return;
11175 
11176   ASTContext &C = CGM.getContext();
11177   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11178   RecordDecl *RD;
11179   if (KmpDimTy.isNull()) {
11180     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11181     //  kmp_int64 lo; // lower
11182     //  kmp_int64 up; // upper
11183     //  kmp_int64 st; // stride
11184     // };
11185     RD = C.buildImplicitRecord("kmp_dim");
11186     RD->startDefinition();
11187     addFieldToRecordDecl(C, RD, Int64Ty);
11188     addFieldToRecordDecl(C, RD, Int64Ty);
11189     addFieldToRecordDecl(C, RD, Int64Ty);
11190     RD->completeDefinition();
11191     KmpDimTy = C.getRecordType(RD);
11192   } else {
11193     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11194   }
11195   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11196   QualType ArrayTy =
11197       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11198 
11199   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11200   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11201   enum { LowerFD = 0, UpperFD, StrideFD };
11202   // Fill dims with data.
11203   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11204     LValue DimsLVal = CGF.MakeAddrLValue(
11205         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11206     // dims.upper = num_iterations;
11207     LValue UpperLVal = CGF.EmitLValueForField(
11208         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11209     llvm::Value *NumIterVal =
11210         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
11211                                  D.getNumIterations()->getType(), Int64Ty,
11212                                  D.getNumIterations()->getExprLoc());
11213     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11214     // dims.stride = 1;
11215     LValue StrideLVal = CGF.EmitLValueForField(
11216         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11217     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11218                           StrideLVal);
11219   }
11220 
11221   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11222   // kmp_int32 num_dims, struct kmp_dim * dims);
11223   llvm::Value *Args[] = {
11224       emitUpdateLocation(CGF, D.getBeginLoc()),
11225       getThreadID(CGF, D.getBeginLoc()),
11226       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11227       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11228           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11229           CGM.VoidPtrTy)};
11230 
11231   llvm::FunctionCallee RTLFn =
11232       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11233   CGF.EmitRuntimeCall(RTLFn, Args);
11234   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11235       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11236   llvm::FunctionCallee FiniRTLFn =
11237       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11238   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11239                                              llvm::makeArrayRef(FiniArgs));
11240 }
11241 
11242 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11243                                           const OMPDependClause *C) {
11244   QualType Int64Ty =
11245       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11246   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11247   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11248       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11249   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11250   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11251     const Expr *CounterVal = C->getLoopData(I);
11252     assert(CounterVal);
11253     llvm::Value *CntVal = CGF.EmitScalarConversion(
11254         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11255         CounterVal->getExprLoc());
11256     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11257                           /*Volatile=*/false, Int64Ty);
11258   }
11259   llvm::Value *Args[] = {
11260       emitUpdateLocation(CGF, C->getBeginLoc()),
11261       getThreadID(CGF, C->getBeginLoc()),
11262       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11263   llvm::FunctionCallee RTLFn;
11264   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11265     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11266   } else {
11267     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11268     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11269   }
11270   CGF.EmitRuntimeCall(RTLFn, Args);
11271 }
11272 
11273 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11274                                llvm::FunctionCallee Callee,
11275                                ArrayRef<llvm::Value *> Args) const {
11276   assert(Loc.isValid() && "Outlined function call location must be valid.");
11277   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11278 
11279   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11280     if (Fn->doesNotThrow()) {
11281       CGF.EmitNounwindRuntimeCall(Fn, Args);
11282       return;
11283     }
11284   }
11285   CGF.EmitRuntimeCall(Callee, Args);
11286 }
11287 
11288 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11289     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11290     ArrayRef<llvm::Value *> Args) const {
11291   emitCall(CGF, Loc, OutlinedFn, Args);
11292 }
11293 
11294 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11295   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11296     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11297       HasEmittedDeclareTargetRegion = true;
11298 }
11299 
11300 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11301                                              const VarDecl *NativeParam,
11302                                              const VarDecl *TargetParam) const {
11303   return CGF.GetAddrOfLocalVar(NativeParam);
11304 }
11305 
11306 namespace {
11307 /// Cleanup action for allocate support.
11308 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11309 public:
11310   static const int CleanupArgs = 3;
11311 
11312 private:
11313   llvm::FunctionCallee RTLFn;
11314   llvm::Value *Args[CleanupArgs];
11315 
11316 public:
11317   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11318                        ArrayRef<llvm::Value *> CallArgs)
11319       : RTLFn(RTLFn) {
11320     assert(CallArgs.size() == CleanupArgs &&
11321            "Size of arguments does not match.");
11322     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11323   }
11324   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11325     if (!CGF.HaveInsertPoint())
11326       return;
11327     CGF.EmitRuntimeCall(RTLFn, Args);
11328   }
11329 };
11330 } // namespace
11331 
11332 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11333                                                    const VarDecl *VD) {
11334   if (!VD)
11335     return Address::invalid();
11336   const VarDecl *CVD = VD->getCanonicalDecl();
11337   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11338     return Address::invalid();
11339   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11340   // Use the default allocation.
11341   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11342       !AA->getAllocator())
11343     return Address::invalid();
11344   llvm::Value *Size;
11345   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11346   if (CVD->getType()->isVariablyModifiedType()) {
11347     Size = CGF.getTypeSize(CVD->getType());
11348     // Align the size: ((size + align - 1) / align) * align
11349     Size = CGF.Builder.CreateNUWAdd(
11350         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11351     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11352     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11353   } else {
11354     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11355     Size = CGM.getSize(Sz.alignTo(Align));
11356   }
11357   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11358   assert(AA->getAllocator() &&
11359          "Expected allocator expression for non-default allocator.");
11360   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11361   // According to the standard, the original allocator type is a enum (integer).
11362   // Convert to pointer type, if required.
11363   if (Allocator->getType()->isIntegerTy())
11364     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11365   else if (Allocator->getType()->isPointerTy())
11366     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11367                                                                 CGM.VoidPtrTy);
11368   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11369 
11370   llvm::Value *Addr =
11371       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11372                           getName({CVD->getName(), ".void.addr"}));
11373   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11374                                                               Allocator};
11375   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11376 
11377   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11378                                                 llvm::makeArrayRef(FiniArgs));
11379   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11380       Addr,
11381       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11382       getName({CVD->getName(), ".addr"}));
11383   return Address(Addr, Align);
11384 }
11385 
11386 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11387     CodeGenModule &CGM, const OMPLoopDirective &S)
11388     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11389   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11390   if (!NeedToPush)
11391     return;
11392   NontemporalDeclsSet &DS =
11393       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11394   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11395     for (const Stmt *Ref : C->private_refs()) {
11396       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11397       const ValueDecl *VD;
11398       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11399         VD = DRE->getDecl();
11400       } else {
11401         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11402         assert((ME->isImplicitCXXThis() ||
11403                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11404                "Expected member of current class.");
11405         VD = ME->getMemberDecl();
11406       }
11407       DS.insert(VD);
11408     }
11409   }
11410 }
11411 
11412 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11413   if (!NeedToPush)
11414     return;
11415   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11416 }
11417 
11418 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11419   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11420 
11421   return llvm::any_of(
11422       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11423       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11424 }
11425 
11426 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11427     const OMPExecutableDirective &S,
11428     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11429     const {
11430   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11431   // Vars in target/task regions must be excluded completely.
11432   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11433       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11434     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11435     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11436     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11437     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11438       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11439         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11440     }
11441   }
11442   // Exclude vars in private clauses.
11443   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11444     for (const Expr *Ref : C->varlists()) {
11445       if (!Ref->getType()->isScalarType())
11446         continue;
11447       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11448       if (!DRE)
11449         continue;
11450       NeedToCheckForLPCs.insert(DRE->getDecl());
11451     }
11452   }
11453   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11454     for (const Expr *Ref : C->varlists()) {
11455       if (!Ref->getType()->isScalarType())
11456         continue;
11457       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11458       if (!DRE)
11459         continue;
11460       NeedToCheckForLPCs.insert(DRE->getDecl());
11461     }
11462   }
11463   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11464     for (const Expr *Ref : C->varlists()) {
11465       if (!Ref->getType()->isScalarType())
11466         continue;
11467       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11468       if (!DRE)
11469         continue;
11470       NeedToCheckForLPCs.insert(DRE->getDecl());
11471     }
11472   }
11473   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11474     for (const Expr *Ref : C->varlists()) {
11475       if (!Ref->getType()->isScalarType())
11476         continue;
11477       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11478       if (!DRE)
11479         continue;
11480       NeedToCheckForLPCs.insert(DRE->getDecl());
11481     }
11482   }
11483   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11484     for (const Expr *Ref : C->varlists()) {
11485       if (!Ref->getType()->isScalarType())
11486         continue;
11487       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11488       if (!DRE)
11489         continue;
11490       NeedToCheckForLPCs.insert(DRE->getDecl());
11491     }
11492   }
11493   for (const Decl *VD : NeedToCheckForLPCs) {
11494     for (const LastprivateConditionalData &Data :
11495          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11496       if (Data.DeclToUniqueName.count(VD) > 0) {
11497         if (!Data.Disabled)
11498           NeedToAddForLPCsAsDisabled.insert(VD);
11499         break;
11500       }
11501     }
11502   }
11503 }
11504 
11505 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11506     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11507     : CGM(CGF.CGM),
11508       Action((CGM.getLangOpts().OpenMP >= 50 &&
11509               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11510                            [](const OMPLastprivateClause *C) {
11511                              return C->getKind() ==
11512                                     OMPC_LASTPRIVATE_conditional;
11513                            }))
11514                  ? ActionToDo::PushAsLastprivateConditional
11515                  : ActionToDo::DoNotPush) {
11516   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11517   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11518     return;
11519   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11520          "Expected a push action.");
11521   LastprivateConditionalData &Data =
11522       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11523   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11524     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11525       continue;
11526 
11527     for (const Expr *Ref : C->varlists()) {
11528       Data.DeclToUniqueName.insert(std::make_pair(
11529           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11530           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11531     }
11532   }
11533   Data.IVLVal = IVLVal;
11534   Data.Fn = CGF.CurFn;
11535 }
11536 
11537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11538     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11539     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11540   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11541   if (CGM.getLangOpts().OpenMP < 50)
11542     return;
11543   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11544   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11545   if (!NeedToAddForLPCsAsDisabled.empty()) {
11546     Action = ActionToDo::DisableLastprivateConditional;
11547     LastprivateConditionalData &Data =
11548         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11549     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11550       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11551     Data.Fn = CGF.CurFn;
11552     Data.Disabled = true;
11553   }
11554 }
11555 
11556 CGOpenMPRuntime::LastprivateConditionalRAII
11557 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11558     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11559   return LastprivateConditionalRAII(CGF, S);
11560 }
11561 
11562 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11563   if (CGM.getLangOpts().OpenMP < 50)
11564     return;
11565   if (Action == ActionToDo::DisableLastprivateConditional) {
11566     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11567            "Expected list of disabled private vars.");
11568     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11569   }
11570   if (Action == ActionToDo::PushAsLastprivateConditional) {
11571     assert(
11572         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11573         "Expected list of lastprivate conditional vars.");
11574     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11575   }
11576 }
11577 
11578 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11579                                                         const VarDecl *VD) {
11580   ASTContext &C = CGM.getContext();
11581   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11582   if (I == LastprivateConditionalToTypes.end())
11583     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11584   QualType NewType;
11585   const FieldDecl *VDField;
11586   const FieldDecl *FiredField;
11587   LValue BaseLVal;
11588   auto VI = I->getSecond().find(VD);
11589   if (VI == I->getSecond().end()) {
11590     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11591     RD->startDefinition();
11592     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11593     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11594     RD->completeDefinition();
11595     NewType = C.getRecordType(RD);
11596     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11597     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11598     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11599   } else {
11600     NewType = std::get<0>(VI->getSecond());
11601     VDField = std::get<1>(VI->getSecond());
11602     FiredField = std::get<2>(VI->getSecond());
11603     BaseLVal = std::get<3>(VI->getSecond());
11604   }
11605   LValue FiredLVal =
11606       CGF.EmitLValueForField(BaseLVal, FiredField);
11607   CGF.EmitStoreOfScalar(
11608       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11609       FiredLVal);
11610   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11611 }
11612 
11613 namespace {
11614 /// Checks if the lastprivate conditional variable is referenced in LHS.
11615 class LastprivateConditionalRefChecker final
11616     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11617   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11618   const Expr *FoundE = nullptr;
11619   const Decl *FoundD = nullptr;
11620   StringRef UniqueDeclName;
11621   LValue IVLVal;
11622   llvm::Function *FoundFn = nullptr;
11623   SourceLocation Loc;
11624 
11625 public:
11626   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11627     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11628          llvm::reverse(LPM)) {
11629       auto It = D.DeclToUniqueName.find(E->getDecl());
11630       if (It == D.DeclToUniqueName.end())
11631         continue;
11632       if (D.Disabled)
11633         return false;
11634       FoundE = E;
11635       FoundD = E->getDecl()->getCanonicalDecl();
11636       UniqueDeclName = It->second;
11637       IVLVal = D.IVLVal;
11638       FoundFn = D.Fn;
11639       break;
11640     }
11641     return FoundE == E;
11642   }
11643   bool VisitMemberExpr(const MemberExpr *E) {
11644     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11645       return false;
11646     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11647          llvm::reverse(LPM)) {
11648       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11649       if (It == D.DeclToUniqueName.end())
11650         continue;
11651       if (D.Disabled)
11652         return false;
11653       FoundE = E;
11654       FoundD = E->getMemberDecl()->getCanonicalDecl();
11655       UniqueDeclName = It->second;
11656       IVLVal = D.IVLVal;
11657       FoundFn = D.Fn;
11658       break;
11659     }
11660     return FoundE == E;
11661   }
11662   bool VisitStmt(const Stmt *S) {
11663     for (const Stmt *Child : S->children()) {
11664       if (!Child)
11665         continue;
11666       if (const auto *E = dyn_cast<Expr>(Child))
11667         if (!E->isGLValue())
11668           continue;
11669       if (Visit(Child))
11670         return true;
11671     }
11672     return false;
11673   }
11674   explicit LastprivateConditionalRefChecker(
11675       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11676       : LPM(LPM) {}
11677   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11678   getFoundData() const {
11679     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11680   }
11681 };
11682 } // namespace
11683 
11684 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11685                                                        LValue IVLVal,
11686                                                        StringRef UniqueDeclName,
11687                                                        LValue LVal,
11688                                                        SourceLocation Loc) {
11689   // Last updated loop counter for the lastprivate conditional var.
11690   // int<xx> last_iv = 0;
11691   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11692   llvm::Constant *LastIV =
11693       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11694   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11695       IVLVal.getAlignment().getAsAlign());
11696   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11697 
11698   // Last value of the lastprivate conditional.
11699   // decltype(priv_a) last_a;
11700   llvm::Constant *Last = getOrCreateInternalVariable(
11701       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11702   cast<llvm::GlobalVariable>(Last)->setAlignment(
11703       LVal.getAlignment().getAsAlign());
11704   LValue LastLVal =
11705       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11706 
11707   // Global loop counter. Required to handle inner parallel-for regions.
11708   // iv
11709   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11710 
11711   // #pragma omp critical(a)
11712   // if (last_iv <= iv) {
11713   //   last_iv = iv;
11714   //   last_a = priv_a;
11715   // }
11716   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11717                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11718     Action.Enter(CGF);
11719     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11720     // (last_iv <= iv) ? Check if the variable is updated and store new
11721     // value in global var.
11722     llvm::Value *CmpRes;
11723     if (IVLVal.getType()->isSignedIntegerType()) {
11724       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11725     } else {
11726       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11727              "Loop iteration variable must be integer.");
11728       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11729     }
11730     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11731     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11732     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11733     // {
11734     CGF.EmitBlock(ThenBB);
11735 
11736     //   last_iv = iv;
11737     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11738 
11739     //   last_a = priv_a;
11740     switch (CGF.getEvaluationKind(LVal.getType())) {
11741     case TEK_Scalar: {
11742       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11743       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11744       break;
11745     }
11746     case TEK_Complex: {
11747       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11748       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11749       break;
11750     }
11751     case TEK_Aggregate:
11752       llvm_unreachable(
11753           "Aggregates are not supported in lastprivate conditional.");
11754     }
11755     // }
11756     CGF.EmitBranch(ExitBB);
11757     // There is no need to emit line number for unconditional branch.
11758     (void)ApplyDebugLocation::CreateEmpty(CGF);
11759     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11760   };
11761 
11762   if (CGM.getLangOpts().OpenMPSimd) {
11763     // Do not emit as a critical region as no parallel region could be emitted.
11764     RegionCodeGenTy ThenRCG(CodeGen);
11765     ThenRCG(CGF);
11766   } else {
11767     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11768   }
11769 }
11770 
11771 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11772                                                          const Expr *LHS) {
11773   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11774     return;
11775   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11776   if (!Checker.Visit(LHS))
11777     return;
11778   const Expr *FoundE;
11779   const Decl *FoundD;
11780   StringRef UniqueDeclName;
11781   LValue IVLVal;
11782   llvm::Function *FoundFn;
11783   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11784       Checker.getFoundData();
11785   if (FoundFn != CGF.CurFn) {
11786     // Special codegen for inner parallel regions.
11787     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11788     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11789     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11790            "Lastprivate conditional is not found in outer region.");
11791     QualType StructTy = std::get<0>(It->getSecond());
11792     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11793     LValue PrivLVal = CGF.EmitLValue(FoundE);
11794     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11795         PrivLVal.getAddress(CGF),
11796         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11797     LValue BaseLVal =
11798         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11799     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11800     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11801                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11802                         FiredLVal, llvm::AtomicOrdering::Unordered,
11803                         /*IsVolatile=*/true, /*isInit=*/false);
11804     return;
11805   }
11806 
11807   // Private address of the lastprivate conditional in the current context.
11808   // priv_a
11809   LValue LVal = CGF.EmitLValue(FoundE);
11810   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11811                                    FoundE->getExprLoc());
11812 }
11813 
11814 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11815     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11816     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11817   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11818     return;
11819   auto Range = llvm::reverse(LastprivateConditionalStack);
11820   auto It = llvm::find_if(
11821       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11822   if (It == Range.end() || It->Fn != CGF.CurFn)
11823     return;
11824   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11825   assert(LPCI != LastprivateConditionalToTypes.end() &&
11826          "Lastprivates must be registered already.");
11827   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11828   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11829   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11830   for (const auto &Pair : It->DeclToUniqueName) {
11831     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11832     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11833       continue;
11834     auto I = LPCI->getSecond().find(Pair.first);
11835     assert(I != LPCI->getSecond().end() &&
11836            "Lastprivate must be rehistered already.");
11837     // bool Cmp = priv_a.Fired != 0;
11838     LValue BaseLVal = std::get<3>(I->getSecond());
11839     LValue FiredLVal =
11840         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11841     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11842     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11843     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11844     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11845     // if (Cmp) {
11846     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11847     CGF.EmitBlock(ThenBB);
11848     Address Addr = CGF.GetAddrOfLocalVar(VD);
11849     LValue LVal;
11850     if (VD->getType()->isReferenceType())
11851       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11852                                            AlignmentSource::Decl);
11853     else
11854       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11855                                 AlignmentSource::Decl);
11856     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11857                                      D.getBeginLoc());
11858     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11859     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11860     // }
11861   }
11862 }
11863 
11864 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11865     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11866     SourceLocation Loc) {
11867   if (CGF.getLangOpts().OpenMP < 50)
11868     return;
11869   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11870   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11871          "Unknown lastprivate conditional variable.");
11872   StringRef UniqueName = It->second;
11873   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11874   // The variable was not updated in the region - exit.
11875   if (!GV)
11876     return;
11877   LValue LPLVal = CGF.MakeAddrLValue(
11878       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11879   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11880   CGF.EmitStoreOfScalar(Res, PrivLVal);
11881 }
11882 
11883 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11884     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11885     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11886   llvm_unreachable("Not supported in SIMD-only mode");
11887 }
11888 
11889 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11890     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11891     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11892   llvm_unreachable("Not supported in SIMD-only mode");
11893 }
11894 
11895 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11896     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11897     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11898     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11899     bool Tied, unsigned &NumberOfParts) {
11900   llvm_unreachable("Not supported in SIMD-only mode");
11901 }
11902 
11903 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11904                                            SourceLocation Loc,
11905                                            llvm::Function *OutlinedFn,
11906                                            ArrayRef<llvm::Value *> CapturedVars,
11907                                            const Expr *IfCond) {
11908   llvm_unreachable("Not supported in SIMD-only mode");
11909 }
11910 
11911 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11912     CodeGenFunction &CGF, StringRef CriticalName,
11913     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11914     const Expr *Hint) {
11915   llvm_unreachable("Not supported in SIMD-only mode");
11916 }
11917 
11918 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11919                                            const RegionCodeGenTy &MasterOpGen,
11920                                            SourceLocation Loc) {
11921   llvm_unreachable("Not supported in SIMD-only mode");
11922 }
11923 
11924 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11925                                             SourceLocation Loc) {
11926   llvm_unreachable("Not supported in SIMD-only mode");
11927 }
11928 
11929 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11930     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11931     SourceLocation Loc) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitSingleRegion(
11936     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11937     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11938     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11939     ArrayRef<const Expr *> AssignmentOps) {
11940   llvm_unreachable("Not supported in SIMD-only mode");
11941 }
11942 
11943 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11944                                             const RegionCodeGenTy &OrderedOpGen,
11945                                             SourceLocation Loc,
11946                                             bool IsThreads) {
11947   llvm_unreachable("Not supported in SIMD-only mode");
11948 }
11949 
11950 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11951                                           SourceLocation Loc,
11952                                           OpenMPDirectiveKind Kind,
11953                                           bool EmitChecks,
11954                                           bool ForceSimpleCall) {
11955   llvm_unreachable("Not supported in SIMD-only mode");
11956 }
11957 
11958 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11959     CodeGenFunction &CGF, SourceLocation Loc,
11960     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11961     bool Ordered, const DispatchRTInput &DispatchValues) {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 void CGOpenMPSIMDRuntime::emitForStaticInit(
11966     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11967     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11968   llvm_unreachable("Not supported in SIMD-only mode");
11969 }
11970 
11971 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11972     CodeGenFunction &CGF, SourceLocation Loc,
11973     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11974   llvm_unreachable("Not supported in SIMD-only mode");
11975 }
11976 
11977 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11978                                                      SourceLocation Loc,
11979                                                      unsigned IVSize,
11980                                                      bool IVSigned) {
11981   llvm_unreachable("Not supported in SIMD-only mode");
11982 }
11983 
11984 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11985                                               SourceLocation Loc,
11986                                               OpenMPDirectiveKind DKind) {
11987   llvm_unreachable("Not supported in SIMD-only mode");
11988 }
11989 
11990 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11991                                               SourceLocation Loc,
11992                                               unsigned IVSize, bool IVSigned,
11993                                               Address IL, Address LB,
11994                                               Address UB, Address ST) {
11995   llvm_unreachable("Not supported in SIMD-only mode");
11996 }
11997 
11998 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11999                                                llvm::Value *NumThreads,
12000                                                SourceLocation Loc) {
12001   llvm_unreachable("Not supported in SIMD-only mode");
12002 }
12003 
12004 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12005                                              ProcBindKind ProcBind,
12006                                              SourceLocation Loc) {
12007   llvm_unreachable("Not supported in SIMD-only mode");
12008 }
12009 
12010 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12011                                                     const VarDecl *VD,
12012                                                     Address VDAddr,
12013                                                     SourceLocation Loc) {
12014   llvm_unreachable("Not supported in SIMD-only mode");
12015 }
12016 
12017 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12018     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12019     CodeGenFunction *CGF) {
12020   llvm_unreachable("Not supported in SIMD-only mode");
12021 }
12022 
12023 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12024     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12025   llvm_unreachable("Not supported in SIMD-only mode");
12026 }
12027 
12028 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12029                                     ArrayRef<const Expr *> Vars,
12030                                     SourceLocation Loc,
12031                                     llvm::AtomicOrdering AO) {
12032   llvm_unreachable("Not supported in SIMD-only mode");
12033 }
12034 
12035 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12036                                        const OMPExecutableDirective &D,
12037                                        llvm::Function *TaskFunction,
12038                                        QualType SharedsTy, Address Shareds,
12039                                        const Expr *IfCond,
12040                                        const OMPTaskDataTy &Data) {
12041   llvm_unreachable("Not supported in SIMD-only mode");
12042 }
12043 
12044 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12045     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12046     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12047     const Expr *IfCond, const OMPTaskDataTy &Data) {
12048   llvm_unreachable("Not supported in SIMD-only mode");
12049 }
12050 
12051 void CGOpenMPSIMDRuntime::emitReduction(
12052     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12053     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12054     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12055   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12056   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12057                                  ReductionOps, Options);
12058 }
12059 
12060 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12061     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12062     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12063   llvm_unreachable("Not supported in SIMD-only mode");
12064 }
12065 
12066 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12067                                                   SourceLocation Loc,
12068                                                   ReductionCodeGen &RCG,
12069                                                   unsigned N) {
12070   llvm_unreachable("Not supported in SIMD-only mode");
12071 }
12072 
12073 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12074                                                   SourceLocation Loc,
12075                                                   llvm::Value *ReductionsPtr,
12076                                                   LValue SharedLVal) {
12077   llvm_unreachable("Not supported in SIMD-only mode");
12078 }
12079 
12080 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12081                                            SourceLocation Loc) {
12082   llvm_unreachable("Not supported in SIMD-only mode");
12083 }
12084 
12085 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12086     CodeGenFunction &CGF, SourceLocation Loc,
12087     OpenMPDirectiveKind CancelRegion) {
12088   llvm_unreachable("Not supported in SIMD-only mode");
12089 }
12090 
12091 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12092                                          SourceLocation Loc, const Expr *IfCond,
12093                                          OpenMPDirectiveKind CancelRegion) {
12094   llvm_unreachable("Not supported in SIMD-only mode");
12095 }
12096 
12097 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12098     const OMPExecutableDirective &D, StringRef ParentName,
12099     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12100     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12101   llvm_unreachable("Not supported in SIMD-only mode");
12102 }
12103 
12104 void CGOpenMPSIMDRuntime::emitTargetCall(
12105     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12106     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12107     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12108     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12109                                      const OMPLoopDirective &D)>
12110         SizeEmitter) {
12111   llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113 
12114 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12115   llvm_unreachable("Not supported in SIMD-only mode");
12116 }
12117 
12118 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12119   llvm_unreachable("Not supported in SIMD-only mode");
12120 }
12121 
12122 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12123   return false;
12124 }
12125 
12126 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12127                                         const OMPExecutableDirective &D,
12128                                         SourceLocation Loc,
12129                                         llvm::Function *OutlinedFn,
12130                                         ArrayRef<llvm::Value *> CapturedVars) {
12131   llvm_unreachable("Not supported in SIMD-only mode");
12132 }
12133 
12134 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12135                                              const Expr *NumTeams,
12136                                              const Expr *ThreadLimit,
12137                                              SourceLocation Loc) {
12138   llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140 
12141 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12142     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12143     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12144   llvm_unreachable("Not supported in SIMD-only mode");
12145 }
12146 
12147 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12148     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12149     const Expr *Device) {
12150   llvm_unreachable("Not supported in SIMD-only mode");
12151 }
12152 
12153 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12154                                            const OMPLoopDirective &D,
12155                                            ArrayRef<Expr *> NumIterations) {
12156   llvm_unreachable("Not supported in SIMD-only mode");
12157 }
12158 
12159 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12160                                               const OMPDependClause *C) {
12161   llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163 
12164 const VarDecl *
12165 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12166                                         const VarDecl *NativeParam) const {
12167   llvm_unreachable("Not supported in SIMD-only mode");
12168 }
12169 
12170 Address
12171 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12172                                          const VarDecl *NativeParam,
12173                                          const VarDecl *TargetParam) const {
12174   llvm_unreachable("Not supported in SIMD-only mode");
12175 }
12176