1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 enum OpenMPRTLFunction {
573   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
574   /// kmpc_micro microtask, ...);
575   OMPRTL__kmpc_fork_call,
576   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
577   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
578   OMPRTL__kmpc_threadprivate_cached,
579   /// Call to void __kmpc_threadprivate_register( ident_t *,
580   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
581   OMPRTL__kmpc_threadprivate_register,
582   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
583   OMPRTL__kmpc_global_thread_num,
584   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
585   // kmp_critical_name *crit);
586   OMPRTL__kmpc_critical,
587   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
588   // global_tid, kmp_critical_name *crit, uintptr_t hint);
589   OMPRTL__kmpc_critical_with_hint,
590   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
591   // kmp_critical_name *crit);
592   OMPRTL__kmpc_end_critical,
593   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_cancel_barrier,
596   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_barrier,
598   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
599   OMPRTL__kmpc_for_static_fini,
600   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
601   // global_tid);
602   OMPRTL__kmpc_serialized_parallel,
603   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
604   // global_tid);
605   OMPRTL__kmpc_end_serialized_parallel,
606   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
607   // kmp_int32 num_threads);
608   OMPRTL__kmpc_push_num_threads,
609   // Call to void __kmpc_flush(ident_t *loc);
610   OMPRTL__kmpc_flush,
611   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_master,
613   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_master,
615   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
616   // int end_part);
617   OMPRTL__kmpc_omp_taskyield,
618   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
619   OMPRTL__kmpc_single,
620   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
621   OMPRTL__kmpc_end_single,
622   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
623   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
624   // kmp_routine_entry_t *task_entry);
625   OMPRTL__kmpc_omp_task_alloc,
626   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
627   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
628   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
629   // kmp_int64 device_id);
630   OMPRTL__kmpc_omp_target_task_alloc,
631   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
632   // new_task);
633   OMPRTL__kmpc_omp_task,
634   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
635   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
636   // kmp_int32 didit);
637   OMPRTL__kmpc_copyprivate,
638   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
639   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
640   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
641   OMPRTL__kmpc_reduce,
642   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
643   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
644   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
645   // *lck);
646   OMPRTL__kmpc_reduce_nowait,
647   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
648   // kmp_critical_name *lck);
649   OMPRTL__kmpc_end_reduce,
650   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
651   // kmp_critical_name *lck);
652   OMPRTL__kmpc_end_reduce_nowait,
653   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
654   // kmp_task_t * new_task);
655   OMPRTL__kmpc_omp_task_begin_if0,
656   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
657   // kmp_task_t * new_task);
658   OMPRTL__kmpc_omp_task_complete_if0,
659   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_ordered,
661   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_ordered,
663   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
664   // global_tid);
665   OMPRTL__kmpc_omp_taskwait,
666   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
667   OMPRTL__kmpc_taskgroup,
668   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
669   OMPRTL__kmpc_end_taskgroup,
670   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
671   // int proc_bind);
672   OMPRTL__kmpc_push_proc_bind,
673   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
674   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
675   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
676   OMPRTL__kmpc_omp_task_with_deps,
677   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
678   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
679   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
680   OMPRTL__kmpc_omp_wait_deps,
681   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
682   // global_tid, kmp_int32 cncl_kind);
683   OMPRTL__kmpc_cancellationpoint,
684   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
685   // kmp_int32 cncl_kind);
686   OMPRTL__kmpc_cancel,
687   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
688   // kmp_int32 num_teams, kmp_int32 thread_limit);
689   OMPRTL__kmpc_push_num_teams,
690   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
691   // microtask, ...);
692   OMPRTL__kmpc_fork_teams,
693   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
694   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
695   // sched, kmp_uint64 grainsize, void *task_dup);
696   OMPRTL__kmpc_taskloop,
697   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
698   // num_dims, struct kmp_dim *dims);
699   OMPRTL__kmpc_doacross_init,
700   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
701   OMPRTL__kmpc_doacross_fini,
702   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
703   // *vec);
704   OMPRTL__kmpc_doacross_post,
705   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
706   // *vec);
707   OMPRTL__kmpc_doacross_wait,
708   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
709   // *data);
710   OMPRTL__kmpc_task_reduction_init,
711   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
712   // *d);
713   OMPRTL__kmpc_task_reduction_get_th_data,
714   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
715   OMPRTL__kmpc_alloc,
716   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
717   OMPRTL__kmpc_free,
718 
719   //
720   // Offloading related calls
721   //
722   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
723   // size);
724   OMPRTL__kmpc_push_target_tripcount,
725   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
726   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
727   // *arg_types);
728   OMPRTL__tgt_target,
729   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
730   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
731   // *arg_types);
732   OMPRTL__tgt_target_nowait,
733   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
734   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
735   // *arg_types, int32_t num_teams, int32_t thread_limit);
736   OMPRTL__tgt_target_teams,
737   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
738   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
739   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
740   OMPRTL__tgt_target_teams_nowait,
741   // Call to void __tgt_register_requires(int64_t flags);
742   OMPRTL__tgt_register_requires,
743   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
744   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
745   OMPRTL__tgt_target_data_begin,
746   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
747   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
748   // *arg_types);
749   OMPRTL__tgt_target_data_begin_nowait,
750   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
751   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
752   OMPRTL__tgt_target_data_end,
753   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
754   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
755   // *arg_types);
756   OMPRTL__tgt_target_data_end_nowait,
757   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
758   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
759   OMPRTL__tgt_target_data_update,
760   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
761   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
762   // *arg_types);
763   OMPRTL__tgt_target_data_update_nowait,
764   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
765   OMPRTL__tgt_mapper_num_components,
766   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
767   // *base, void *begin, int64_t size, int64_t type);
768   OMPRTL__tgt_push_mapper_component,
769   // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
770   // int gtid, kmp_task_t *task);
771   OMPRTL__kmpc_task_allow_completion_event,
772 };
773 
774 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
775 /// region.
776 class CleanupTy final : public EHScopeStack::Cleanup {
777   PrePostActionTy *Action;
778 
779 public:
780   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
781   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
782     if (!CGF.HaveInsertPoint())
783       return;
784     Action->Exit(CGF);
785   }
786 };
787 
788 } // anonymous namespace
789 
790 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
791   CodeGenFunction::RunCleanupsScope Scope(CGF);
792   if (PrePostAction) {
793     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
794     Callback(CodeGen, CGF, *PrePostAction);
795   } else {
796     PrePostActionTy Action;
797     Callback(CodeGen, CGF, Action);
798   }
799 }
800 
801 /// Check if the combiner is a call to UDR combiner and if it is so return the
802 /// UDR decl used for reduction.
803 static const OMPDeclareReductionDecl *
804 getReductionInit(const Expr *ReductionOp) {
805   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
806     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
807       if (const auto *DRE =
808               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
809         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
810           return DRD;
811   return nullptr;
812 }
813 
814 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
815                                              const OMPDeclareReductionDecl *DRD,
816                                              const Expr *InitOp,
817                                              Address Private, Address Original,
818                                              QualType Ty) {
819   if (DRD->getInitializer()) {
820     std::pair<llvm::Function *, llvm::Function *> Reduction =
821         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
822     const auto *CE = cast<CallExpr>(InitOp);
823     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
824     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
825     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
826     const auto *LHSDRE =
827         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
828     const auto *RHSDRE =
829         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
830     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
831     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
832                             [=]() { return Private; });
833     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
834                             [=]() { return Original; });
835     (void)PrivateScope.Privatize();
836     RValue Func = RValue::get(Reduction.second);
837     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
838     CGF.EmitIgnoredExpr(InitOp);
839   } else {
840     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
841     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
842     auto *GV = new llvm::GlobalVariable(
843         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
844         llvm::GlobalValue::PrivateLinkage, Init, Name);
845     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
846     RValue InitRVal;
847     switch (CGF.getEvaluationKind(Ty)) {
848     case TEK_Scalar:
849       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
850       break;
851     case TEK_Complex:
852       InitRVal =
853           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
854       break;
855     case TEK_Aggregate:
856       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
857       break;
858     }
859     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
860     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
861     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
862                          /*IsInitializer=*/false);
863   }
864 }
865 
866 /// Emit initialization of arrays of complex types.
867 /// \param DestAddr Address of the array.
868 /// \param Type Type of array.
869 /// \param Init Initial expression of array.
870 /// \param SrcAddr Address of the original array.
871 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
872                                  QualType Type, bool EmitDeclareReductionInit,
873                                  const Expr *Init,
874                                  const OMPDeclareReductionDecl *DRD,
875                                  Address SrcAddr = Address::invalid()) {
876   // Perform element-by-element initialization.
877   QualType ElementTy;
878 
879   // Drill down to the base element type on both arrays.
880   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
881   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
882   DestAddr =
883       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
884   if (DRD)
885     SrcAddr =
886         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
887 
888   llvm::Value *SrcBegin = nullptr;
889   if (DRD)
890     SrcBegin = SrcAddr.getPointer();
891   llvm::Value *DestBegin = DestAddr.getPointer();
892   // Cast from pointer to array type to pointer to single element.
893   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
894   // The basic structure here is a while-do loop.
895   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
896   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
897   llvm::Value *IsEmpty =
898       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
899   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
900 
901   // Enter the loop body, making that address the current address.
902   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
903   CGF.EmitBlock(BodyBB);
904 
905   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
906 
907   llvm::PHINode *SrcElementPHI = nullptr;
908   Address SrcElementCurrent = Address::invalid();
909   if (DRD) {
910     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
911                                           "omp.arraycpy.srcElementPast");
912     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
913     SrcElementCurrent =
914         Address(SrcElementPHI,
915                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
916   }
917   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
918       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
919   DestElementPHI->addIncoming(DestBegin, EntryBB);
920   Address DestElementCurrent =
921       Address(DestElementPHI,
922               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
923 
924   // Emit copy.
925   {
926     CodeGenFunction::RunCleanupsScope InitScope(CGF);
927     if (EmitDeclareReductionInit) {
928       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
929                                        SrcElementCurrent, ElementTy);
930     } else
931       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
932                            /*IsInitializer=*/false);
933   }
934 
935   if (DRD) {
936     // Shift the address forward by one element.
937     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
938         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
939     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
940   }
941 
942   // Shift the address forward by one element.
943   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
944       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
945   // Check whether we've reached the end.
946   llvm::Value *Done =
947       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
948   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
949   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
950 
951   // Done.
952   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
953 }
954 
955 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
956   return CGF.EmitOMPSharedLValue(E);
957 }
958 
959 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
960                                             const Expr *E) {
961   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
962     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
963   return LValue();
964 }
965 
966 void ReductionCodeGen::emitAggregateInitialization(
967     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
968     const OMPDeclareReductionDecl *DRD) {
969   // Emit VarDecl with copy init for arrays.
970   // Get the address of the original variable captured in current
971   // captured region.
972   const auto *PrivateVD =
973       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
974   bool EmitDeclareReductionInit =
975       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
976   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
977                        EmitDeclareReductionInit,
978                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
979                                                 : PrivateVD->getInit(),
980                        DRD, SharedLVal.getAddress(CGF));
981 }
982 
983 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
984                                    ArrayRef<const Expr *> Privates,
985                                    ArrayRef<const Expr *> ReductionOps) {
986   ClausesData.reserve(Shareds.size());
987   SharedAddresses.reserve(Shareds.size());
988   Sizes.reserve(Shareds.size());
989   BaseDecls.reserve(Shareds.size());
990   auto IPriv = Privates.begin();
991   auto IRed = ReductionOps.begin();
992   for (const Expr *Ref : Shareds) {
993     ClausesData.emplace_back(Ref, *IPriv, *IRed);
994     std::advance(IPriv, 1);
995     std::advance(IRed, 1);
996   }
997 }
998 
999 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
1000   assert(SharedAddresses.size() == N &&
1001          "Number of generated lvalues must be exactly N.");
1002   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1003   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1004   SharedAddresses.emplace_back(First, Second);
1005 }
1006 
1007 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1008   const auto *PrivateVD =
1009       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1010   QualType PrivateType = PrivateVD->getType();
1011   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1012   if (!PrivateType->isVariablyModifiedType()) {
1013     Sizes.emplace_back(
1014         CGF.getTypeSize(
1015             SharedAddresses[N].first.getType().getNonReferenceType()),
1016         nullptr);
1017     return;
1018   }
1019   llvm::Value *Size;
1020   llvm::Value *SizeInChars;
1021   auto *ElemType = cast<llvm::PointerType>(
1022                        SharedAddresses[N].first.getPointer(CGF)->getType())
1023                        ->getElementType();
1024   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1025   if (AsArraySection) {
1026     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1027                                      SharedAddresses[N].first.getPointer(CGF));
1028     Size = CGF.Builder.CreateNUWAdd(
1029         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1030     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1031   } else {
1032     SizeInChars = CGF.getTypeSize(
1033         SharedAddresses[N].first.getType().getNonReferenceType());
1034     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1035   }
1036   Sizes.emplace_back(SizeInChars, Size);
1037   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1038       CGF,
1039       cast<OpaqueValueExpr>(
1040           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1041       RValue::get(Size));
1042   CGF.EmitVariablyModifiedType(PrivateType);
1043 }
1044 
1045 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1046                                          llvm::Value *Size) {
1047   const auto *PrivateVD =
1048       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1049   QualType PrivateType = PrivateVD->getType();
1050   if (!PrivateType->isVariablyModifiedType()) {
1051     assert(!Size && !Sizes[N].second &&
1052            "Size should be nullptr for non-variably modified reduction "
1053            "items.");
1054     return;
1055   }
1056   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1057       CGF,
1058       cast<OpaqueValueExpr>(
1059           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1060       RValue::get(Size));
1061   CGF.EmitVariablyModifiedType(PrivateType);
1062 }
1063 
1064 void ReductionCodeGen::emitInitialization(
1065     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1066     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1067   assert(SharedAddresses.size() > N && "No variable was generated");
1068   const auto *PrivateVD =
1069       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1070   const OMPDeclareReductionDecl *DRD =
1071       getReductionInit(ClausesData[N].ReductionOp);
1072   QualType PrivateType = PrivateVD->getType();
1073   PrivateAddr = CGF.Builder.CreateElementBitCast(
1074       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1075   QualType SharedType = SharedAddresses[N].first.getType();
1076   SharedLVal = CGF.MakeAddrLValue(
1077       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1078                                        CGF.ConvertTypeForMem(SharedType)),
1079       SharedType, SharedAddresses[N].first.getBaseInfo(),
1080       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1081   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1082     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1083   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1084     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1085                                      PrivateAddr, SharedLVal.getAddress(CGF),
1086                                      SharedLVal.getType());
1087   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1088              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1089     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1090                          PrivateVD->getType().getQualifiers(),
1091                          /*IsInitializer=*/false);
1092   }
1093 }
1094 
1095 bool ReductionCodeGen::needCleanups(unsigned N) {
1096   const auto *PrivateVD =
1097       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1098   QualType PrivateType = PrivateVD->getType();
1099   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1100   return DTorKind != QualType::DK_none;
1101 }
1102 
1103 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1104                                     Address PrivateAddr) {
1105   const auto *PrivateVD =
1106       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1107   QualType PrivateType = PrivateVD->getType();
1108   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1109   if (needCleanups(N)) {
1110     PrivateAddr = CGF.Builder.CreateElementBitCast(
1111         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1112     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1113   }
1114 }
1115 
1116 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1117                           LValue BaseLV) {
1118   BaseTy = BaseTy.getNonReferenceType();
1119   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1120          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1121     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1122       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1123     } else {
1124       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1125       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1126     }
1127     BaseTy = BaseTy->getPointeeType();
1128   }
1129   return CGF.MakeAddrLValue(
1130       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1131                                        CGF.ConvertTypeForMem(ElTy)),
1132       BaseLV.getType(), BaseLV.getBaseInfo(),
1133       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1134 }
1135 
1136 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1137                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1138                           llvm::Value *Addr) {
1139   Address Tmp = Address::invalid();
1140   Address TopTmp = Address::invalid();
1141   Address MostTopTmp = Address::invalid();
1142   BaseTy = BaseTy.getNonReferenceType();
1143   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1144          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1145     Tmp = CGF.CreateMemTemp(BaseTy);
1146     if (TopTmp.isValid())
1147       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1148     else
1149       MostTopTmp = Tmp;
1150     TopTmp = Tmp;
1151     BaseTy = BaseTy->getPointeeType();
1152   }
1153   llvm::Type *Ty = BaseLVType;
1154   if (Tmp.isValid())
1155     Ty = Tmp.getElementType();
1156   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1157   if (Tmp.isValid()) {
1158     CGF.Builder.CreateStore(Addr, Tmp);
1159     return MostTopTmp;
1160   }
1161   return Address(Addr, BaseLVAlignment);
1162 }
1163 
1164 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1165   const VarDecl *OrigVD = nullptr;
1166   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1167     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1168     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1169       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1170     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1171       Base = TempASE->getBase()->IgnoreParenImpCasts();
1172     DE = cast<DeclRefExpr>(Base);
1173     OrigVD = cast<VarDecl>(DE->getDecl());
1174   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1175     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1176     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1177       Base = TempASE->getBase()->IgnoreParenImpCasts();
1178     DE = cast<DeclRefExpr>(Base);
1179     OrigVD = cast<VarDecl>(DE->getDecl());
1180   }
1181   return OrigVD;
1182 }
1183 
1184 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1185                                                Address PrivateAddr) {
1186   const DeclRefExpr *DE;
1187   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1188     BaseDecls.emplace_back(OrigVD);
1189     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1190     LValue BaseLValue =
1191         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1192                     OriginalBaseLValue);
1193     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1194         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1195     llvm::Value *PrivatePointer =
1196         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1197             PrivateAddr.getPointer(),
1198             SharedAddresses[N].first.getAddress(CGF).getType());
1199     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1200     return castToBase(CGF, OrigVD->getType(),
1201                       SharedAddresses[N].first.getType(),
1202                       OriginalBaseLValue.getAddress(CGF).getType(),
1203                       OriginalBaseLValue.getAlignment(), Ptr);
1204   }
1205   BaseDecls.emplace_back(
1206       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1207   return PrivateAddr;
1208 }
1209 
1210 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1211   const OMPDeclareReductionDecl *DRD =
1212       getReductionInit(ClausesData[N].ReductionOp);
1213   return DRD && DRD->getInitializer();
1214 }
1215 
1216 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1217   return CGF.EmitLoadOfPointerLValue(
1218       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1219       getThreadIDVariable()->getType()->castAs<PointerType>());
1220 }
1221 
1222 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1223   if (!CGF.HaveInsertPoint())
1224     return;
1225   // 1.2.2 OpenMP Language Terminology
1226   // Structured block - An executable statement with a single entry at the
1227   // top and a single exit at the bottom.
1228   // The point of exit cannot be a branch out of the structured block.
1229   // longjmp() and throw() must not violate the entry/exit criteria.
1230   CGF.EHStack.pushTerminate();
1231   CodeGen(CGF);
1232   CGF.EHStack.popTerminate();
1233 }
1234 
1235 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1236     CodeGenFunction &CGF) {
1237   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1238                             getThreadIDVariable()->getType(),
1239                             AlignmentSource::Decl);
1240 }
1241 
1242 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1243                                        QualType FieldTy) {
1244   auto *Field = FieldDecl::Create(
1245       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1246       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1247       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1248   Field->setAccess(AS_public);
1249   DC->addDecl(Field);
1250   return Field;
1251 }
1252 
1253 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1254                                  StringRef Separator)
1255     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1256       OffloadEntriesInfoManager(CGM) {
1257   ASTContext &C = CGM.getContext();
1258   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1260   RD->startDefinition();
1261   // reserved_1
1262   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1263   // flags
1264   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1265   // reserved_2
1266   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1267   // reserved_3
1268   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1269   // psource
1270   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1271   RD->completeDefinition();
1272   IdentQTy = C.getRecordType(RD);
1273   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1274   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1275 
1276   loadOffloadInfoMetadata();
1277 }
1278 
1279 void CGOpenMPRuntime::clear() {
1280   InternalVars.clear();
1281   // Clean non-target variable declarations possibly used only in debug info.
1282   for (const auto &Data : EmittedNonTargetVariables) {
1283     if (!Data.getValue().pointsToAliveValue())
1284       continue;
1285     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1286     if (!GV)
1287       continue;
1288     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1289       continue;
1290     GV->eraseFromParent();
1291   }
1292 }
1293 
1294 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1295   SmallString<128> Buffer;
1296   llvm::raw_svector_ostream OS(Buffer);
1297   StringRef Sep = FirstSeparator;
1298   for (StringRef Part : Parts) {
1299     OS << Sep << Part;
1300     Sep = Separator;
1301   }
1302   return std::string(OS.str());
1303 }
1304 
1305 static llvm::Function *
1306 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1307                           const Expr *CombinerInitializer, const VarDecl *In,
1308                           const VarDecl *Out, bool IsCombiner) {
1309   // void .omp_combiner.(Ty *in, Ty *out);
1310   ASTContext &C = CGM.getContext();
1311   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1312   FunctionArgList Args;
1313   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1314                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1315   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1316                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1317   Args.push_back(&OmpOutParm);
1318   Args.push_back(&OmpInParm);
1319   const CGFunctionInfo &FnInfo =
1320       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1321   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1322   std::string Name = CGM.getOpenMPRuntime().getName(
1323       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1324   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1325                                     Name, &CGM.getModule());
1326   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1327   if (CGM.getLangOpts().Optimize) {
1328     Fn->removeFnAttr(llvm::Attribute::NoInline);
1329     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1330     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1331   }
1332   CodeGenFunction CGF(CGM);
1333   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1334   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1335   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1336                     Out->getLocation());
1337   CodeGenFunction::OMPPrivateScope Scope(CGF);
1338   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1339   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1340     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1341         .getAddress(CGF);
1342   });
1343   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1344   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1345     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1346         .getAddress(CGF);
1347   });
1348   (void)Scope.Privatize();
1349   if (!IsCombiner && Out->hasInit() &&
1350       !CGF.isTrivialInitializer(Out->getInit())) {
1351     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1352                          Out->getType().getQualifiers(),
1353                          /*IsInitializer=*/true);
1354   }
1355   if (CombinerInitializer)
1356     CGF.EmitIgnoredExpr(CombinerInitializer);
1357   Scope.ForceCleanup();
1358   CGF.FinishFunction();
1359   return Fn;
1360 }
1361 
1362 void CGOpenMPRuntime::emitUserDefinedReduction(
1363     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1364   if (UDRMap.count(D) > 0)
1365     return;
1366   llvm::Function *Combiner = emitCombinerOrInitializer(
1367       CGM, D->getType(), D->getCombiner(),
1368       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1369       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1370       /*IsCombiner=*/true);
1371   llvm::Function *Initializer = nullptr;
1372   if (const Expr *Init = D->getInitializer()) {
1373     Initializer = emitCombinerOrInitializer(
1374         CGM, D->getType(),
1375         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1376                                                                      : nullptr,
1377         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1378         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1379         /*IsCombiner=*/false);
1380   }
1381   UDRMap.try_emplace(D, Combiner, Initializer);
1382   if (CGF) {
1383     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1384     Decls.second.push_back(D);
1385   }
1386 }
1387 
1388 std::pair<llvm::Function *, llvm::Function *>
1389 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1390   auto I = UDRMap.find(D);
1391   if (I != UDRMap.end())
1392     return I->second;
1393   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1394   return UDRMap.lookup(D);
1395 }
1396 
1397 namespace {
1398 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1399 // Builder if one is present.
1400 struct PushAndPopStackRAII {
1401   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1402                       bool HasCancel)
1403       : OMPBuilder(OMPBuilder) {
1404     if (!OMPBuilder)
1405       return;
1406 
1407     // The following callback is the crucial part of clangs cleanup process.
1408     //
1409     // NOTE:
1410     // Once the OpenMPIRBuilder is used to create parallel regions (and
1411     // similar), the cancellation destination (Dest below) is determined via
1412     // IP. That means if we have variables to finalize we split the block at IP,
1413     // use the new block (=BB) as destination to build a JumpDest (via
1414     // getJumpDestInCurrentScope(BB)) which then is fed to
1415     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1416     // to push & pop an FinalizationInfo object.
1417     // The FiniCB will still be needed but at the point where the
1418     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1419     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1420       assert(IP.getBlock()->end() == IP.getPoint() &&
1421              "Clang CG should cause non-terminated block!");
1422       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1423       CGF.Builder.restoreIP(IP);
1424       CodeGenFunction::JumpDest Dest =
1425           CGF.getOMPCancelDestination(OMPD_parallel);
1426       CGF.EmitBranchThroughCleanup(Dest);
1427     };
1428 
1429     // TODO: Remove this once we emit parallel regions through the
1430     //       OpenMPIRBuilder as it can do this setup internally.
1431     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1432         {FiniCB, OMPD_parallel, HasCancel});
1433     OMPBuilder->pushFinalizationCB(std::move(FI));
1434   }
1435   ~PushAndPopStackRAII() {
1436     if (OMPBuilder)
1437       OMPBuilder->popFinalizationCB();
1438   }
1439   llvm::OpenMPIRBuilder *OMPBuilder;
1440 };
1441 } // namespace
1442 
1443 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1444     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1445     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1446     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1447   assert(ThreadIDVar->getType()->isPointerType() &&
1448          "thread id variable must be of type kmp_int32 *");
1449   CodeGenFunction CGF(CGM, true);
1450   bool HasCancel = false;
1451   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1452     HasCancel = OPD->hasCancel();
1453   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1454     HasCancel = OPSD->hasCancel();
1455   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1456     HasCancel = OPFD->hasCancel();
1457   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1458     HasCancel = OPFD->hasCancel();
1459   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1460     HasCancel = OPFD->hasCancel();
1461   else if (const auto *OPFD =
1462                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1463     HasCancel = OPFD->hasCancel();
1464   else if (const auto *OPFD =
1465                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1466     HasCancel = OPFD->hasCancel();
1467 
1468   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1469   //       parallel region to make cancellation barriers work properly.
1470   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1471   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1472   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1473                                     HasCancel, OutlinedHelperName);
1474   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1475   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1476 }
1477 
1478 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1479     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1480     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1481   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1482   return emitParallelOrTeamsOutlinedFunction(
1483       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1484 }
1485 
1486 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1487     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1488     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1489   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1490   return emitParallelOrTeamsOutlinedFunction(
1491       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1492 }
1493 
1494 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1495     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1496     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1497     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1498     bool Tied, unsigned &NumberOfParts) {
1499   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1500                                               PrePostActionTy &) {
1501     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1502     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1503     llvm::Value *TaskArgs[] = {
1504         UpLoc, ThreadID,
1505         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1506                                     TaskTVar->getType()->castAs<PointerType>())
1507             .getPointer(CGF)};
1508     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1509   };
1510   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1511                                                             UntiedCodeGen);
1512   CodeGen.setAction(Action);
1513   assert(!ThreadIDVar->getType()->isPointerType() &&
1514          "thread id variable must be of type kmp_int32 for tasks");
1515   const OpenMPDirectiveKind Region =
1516       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1517                                                       : OMPD_task;
1518   const CapturedStmt *CS = D.getCapturedStmt(Region);
1519   bool HasCancel = false;
1520   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1521     HasCancel = TD->hasCancel();
1522   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1523     HasCancel = TD->hasCancel();
1524   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1525     HasCancel = TD->hasCancel();
1526   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1527     HasCancel = TD->hasCancel();
1528 
1529   CodeGenFunction CGF(CGM, true);
1530   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1531                                         InnermostKind, HasCancel, Action);
1532   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1533   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1534   if (!Tied)
1535     NumberOfParts = Action.getNumberOfParts();
1536   return Res;
1537 }
1538 
1539 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1540                              const RecordDecl *RD, const CGRecordLayout &RL,
1541                              ArrayRef<llvm::Constant *> Data) {
1542   llvm::StructType *StructTy = RL.getLLVMType();
1543   unsigned PrevIdx = 0;
1544   ConstantInitBuilder CIBuilder(CGM);
1545   auto DI = Data.begin();
1546   for (const FieldDecl *FD : RD->fields()) {
1547     unsigned Idx = RL.getLLVMFieldNo(FD);
1548     // Fill the alignment.
1549     for (unsigned I = PrevIdx; I < Idx; ++I)
1550       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1551     PrevIdx = Idx + 1;
1552     Fields.add(*DI);
1553     ++DI;
1554   }
1555 }
1556 
1557 template <class... As>
1558 static llvm::GlobalVariable *
1559 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1560                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1561                    As &&... Args) {
1562   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1563   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1564   ConstantInitBuilder CIBuilder(CGM);
1565   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1566   buildStructValue(Fields, CGM, RD, RL, Data);
1567   return Fields.finishAndCreateGlobal(
1568       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1569       std::forward<As>(Args)...);
1570 }
1571 
1572 template <typename T>
1573 static void
1574 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1575                                          ArrayRef<llvm::Constant *> Data,
1576                                          T &Parent) {
1577   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1578   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1579   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1580   buildStructValue(Fields, CGM, RD, RL, Data);
1581   Fields.finishAndAddTo(Parent);
1582 }
1583 
1584 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1585   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1586   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1587   FlagsTy FlagsKey(Flags, Reserved2Flags);
1588   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1589   if (!Entry) {
1590     if (!DefaultOpenMPPSource) {
1591       // Initialize default location for psource field of ident_t structure of
1592       // all ident_t objects. Format is ";file;function;line;column;;".
1593       // Taken from
1594       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1595       DefaultOpenMPPSource =
1596           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1597       DefaultOpenMPPSource =
1598           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1599     }
1600 
1601     llvm::Constant *Data[] = {
1602         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1603         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1604         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1605         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1606     llvm::GlobalValue *DefaultOpenMPLocation =
1607         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1608                            llvm::GlobalValue::PrivateLinkage);
1609     DefaultOpenMPLocation->setUnnamedAddr(
1610         llvm::GlobalValue::UnnamedAddr::Global);
1611 
1612     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1613   }
1614   return Address(Entry, Align);
1615 }
1616 
1617 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1618                                              bool AtCurrentPoint) {
1619   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1620   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1621 
1622   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1623   if (AtCurrentPoint) {
1624     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1625         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1626   } else {
1627     Elem.second.ServiceInsertPt =
1628         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1629     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1630   }
1631 }
1632 
1633 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1634   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1635   if (Elem.second.ServiceInsertPt) {
1636     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1637     Elem.second.ServiceInsertPt = nullptr;
1638     Ptr->eraseFromParent();
1639   }
1640 }
1641 
1642 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1643                                                  SourceLocation Loc,
1644                                                  unsigned Flags) {
1645   Flags |= OMP_IDENT_KMPC;
1646   // If no debug info is generated - return global default location.
1647   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1648       Loc.isInvalid())
1649     return getOrCreateDefaultLocation(Flags).getPointer();
1650 
1651   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1652 
1653   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1654   Address LocValue = Address::invalid();
1655   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1656   if (I != OpenMPLocThreadIDMap.end())
1657     LocValue = Address(I->second.DebugLoc, Align);
1658 
1659   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1660   // GetOpenMPThreadID was called before this routine.
1661   if (!LocValue.isValid()) {
1662     // Generate "ident_t .kmpc_loc.addr;"
1663     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1664     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1665     Elem.second.DebugLoc = AI.getPointer();
1666     LocValue = AI;
1667 
1668     if (!Elem.second.ServiceInsertPt)
1669       setLocThreadIdInsertPt(CGF);
1670     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1671     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1672     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1673                              CGF.getTypeSize(IdentQTy));
1674   }
1675 
1676   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1677   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1678   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1679   LValue PSource =
1680       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1681 
1682   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1683   if (OMPDebugLoc == nullptr) {
1684     SmallString<128> Buffer2;
1685     llvm::raw_svector_ostream OS2(Buffer2);
1686     // Build debug location
1687     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1688     OS2 << ";" << PLoc.getFilename() << ";";
1689     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1690       OS2 << FD->getQualifiedNameAsString();
1691     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1692     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1693     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1694   }
1695   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1696   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1697 
1698   // Our callers always pass this to a runtime function, so for
1699   // convenience, go ahead and return a naked pointer.
1700   return LocValue.getPointer();
1701 }
1702 
1703 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1704                                           SourceLocation Loc) {
1705   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1706 
1707   llvm::Value *ThreadID = nullptr;
1708   // Check whether we've already cached a load of the thread id in this
1709   // function.
1710   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1711   if (I != OpenMPLocThreadIDMap.end()) {
1712     ThreadID = I->second.ThreadID;
1713     if (ThreadID != nullptr)
1714       return ThreadID;
1715   }
1716   // If exceptions are enabled, do not use parameter to avoid possible crash.
1717   if (auto *OMPRegionInfo =
1718           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1719     if (OMPRegionInfo->getThreadIDVariable()) {
1720       // Check if this an outlined function with thread id passed as argument.
1721       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1722       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1723       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1724           !CGF.getLangOpts().CXXExceptions ||
1725           CGF.Builder.GetInsertBlock() == TopBlock ||
1726           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1727           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1728               TopBlock ||
1729           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1730               CGF.Builder.GetInsertBlock()) {
1731         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1732         // If value loaded in entry block, cache it and use it everywhere in
1733         // function.
1734         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1735           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1736           Elem.second.ThreadID = ThreadID;
1737         }
1738         return ThreadID;
1739       }
1740     }
1741   }
1742 
1743   // This is not an outlined function region - need to call __kmpc_int32
1744   // kmpc_global_thread_num(ident_t *loc).
1745   // Generate thread id value and cache this value for use across the
1746   // function.
1747   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1748   if (!Elem.second.ServiceInsertPt)
1749     setLocThreadIdInsertPt(CGF);
1750   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1751   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1752   llvm::CallInst *Call = CGF.Builder.CreateCall(
1753       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1754       emitUpdateLocation(CGF, Loc));
1755   Call->setCallingConv(CGF.getRuntimeCC());
1756   Elem.second.ThreadID = Call;
1757   return Call;
1758 }
1759 
1760 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1761   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1762   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1763     clearLocThreadIdInsertPt(CGF);
1764     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1765   }
1766   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1767     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1768       UDRMap.erase(D);
1769     FunctionUDRMap.erase(CGF.CurFn);
1770   }
1771   auto I = FunctionUDMMap.find(CGF.CurFn);
1772   if (I != FunctionUDMMap.end()) {
1773     for(const auto *D : I->second)
1774       UDMMap.erase(D);
1775     FunctionUDMMap.erase(I);
1776   }
1777   LastprivateConditionalToTypes.erase(CGF.CurFn);
1778 }
1779 
1780 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1781   return IdentTy->getPointerTo();
1782 }
1783 
1784 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1785   if (!Kmpc_MicroTy) {
1786     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1787     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1788                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1789     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1790   }
1791   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1792 }
1793 
1794 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1795   llvm::FunctionCallee RTLFn = nullptr;
1796   switch (static_cast<OpenMPRTLFunction>(Function)) {
1797   case OMPRTL__kmpc_fork_call: {
1798     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1799     // microtask, ...);
1800     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1801                                 getKmpc_MicroPointerTy()};
1802     auto *FnTy =
1803         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1804     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1805     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1806       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1807         llvm::LLVMContext &Ctx = F->getContext();
1808         llvm::MDBuilder MDB(Ctx);
1809         // Annotate the callback behavior of the __kmpc_fork_call:
1810         //  - The callback callee is argument number 2 (microtask).
1811         //  - The first two arguments of the callback callee are unknown (-1).
1812         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1813         //    callback callee.
1814         F->addMetadata(
1815             llvm::LLVMContext::MD_callback,
1816             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1817                                         2, {-1, -1},
1818                                         /* VarArgsArePassed */ true)}));
1819       }
1820     }
1821     break;
1822   }
1823   case OMPRTL__kmpc_global_thread_num: {
1824     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1825     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1826     auto *FnTy =
1827         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1828     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1829     break;
1830   }
1831   case OMPRTL__kmpc_threadprivate_cached: {
1832     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1833     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1834     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1835                                 CGM.VoidPtrTy, CGM.SizeTy,
1836                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1837     auto *FnTy =
1838         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1839     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1840     break;
1841   }
1842   case OMPRTL__kmpc_critical: {
1843     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1844     // kmp_critical_name *crit);
1845     llvm::Type *TypeParams[] = {
1846         getIdentTyPointerTy(), CGM.Int32Ty,
1847         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1848     auto *FnTy =
1849         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1850     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1851     break;
1852   }
1853   case OMPRTL__kmpc_critical_with_hint: {
1854     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1855     // kmp_critical_name *crit, uintptr_t hint);
1856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1857                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1858                                 CGM.IntPtrTy};
1859     auto *FnTy =
1860         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1861     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1862     break;
1863   }
1864   case OMPRTL__kmpc_threadprivate_register: {
1865     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1866     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1867     // typedef void *(*kmpc_ctor)(void *);
1868     auto *KmpcCtorTy =
1869         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1870                                 /*isVarArg*/ false)->getPointerTo();
1871     // typedef void *(*kmpc_cctor)(void *, void *);
1872     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1873     auto *KmpcCopyCtorTy =
1874         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1875                                 /*isVarArg*/ false)
1876             ->getPointerTo();
1877     // typedef void (*kmpc_dtor)(void *);
1878     auto *KmpcDtorTy =
1879         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1880             ->getPointerTo();
1881     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1882                               KmpcCopyCtorTy, KmpcDtorTy};
1883     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1884                                         /*isVarArg*/ false);
1885     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1886     break;
1887   }
1888   case OMPRTL__kmpc_end_critical: {
1889     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1890     // kmp_critical_name *crit);
1891     llvm::Type *TypeParams[] = {
1892         getIdentTyPointerTy(), CGM.Int32Ty,
1893         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1894     auto *FnTy =
1895         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1897     break;
1898   }
1899   case OMPRTL__kmpc_cancel_barrier: {
1900     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1901     // global_tid);
1902     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1903     auto *FnTy =
1904         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1905     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1906     break;
1907   }
1908   case OMPRTL__kmpc_barrier: {
1909     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1910     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1911     auto *FnTy =
1912         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1913     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1914     break;
1915   }
1916   case OMPRTL__kmpc_for_static_fini: {
1917     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1919     auto *FnTy =
1920         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1921     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1922     break;
1923   }
1924   case OMPRTL__kmpc_push_num_threads: {
1925     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1926     // kmp_int32 num_threads)
1927     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1928                                 CGM.Int32Ty};
1929     auto *FnTy =
1930         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1932     break;
1933   }
1934   case OMPRTL__kmpc_serialized_parallel: {
1935     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1936     // global_tid);
1937     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938     auto *FnTy =
1939         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1940     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1941     break;
1942   }
1943   case OMPRTL__kmpc_end_serialized_parallel: {
1944     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1945     // global_tid);
1946     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1947     auto *FnTy =
1948         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1949     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1950     break;
1951   }
1952   case OMPRTL__kmpc_flush: {
1953     // Build void __kmpc_flush(ident_t *loc);
1954     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1955     auto *FnTy =
1956         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1957     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1958     break;
1959   }
1960   case OMPRTL__kmpc_master: {
1961     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1962     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1963     auto *FnTy =
1964         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1965     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1966     break;
1967   }
1968   case OMPRTL__kmpc_end_master: {
1969     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1970     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1971     auto *FnTy =
1972         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1973     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1974     break;
1975   }
1976   case OMPRTL__kmpc_omp_taskyield: {
1977     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1978     // int end_part);
1979     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1980     auto *FnTy =
1981         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1982     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1983     break;
1984   }
1985   case OMPRTL__kmpc_single: {
1986     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1987     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1991     break;
1992   }
1993   case OMPRTL__kmpc_end_single: {
1994     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1995     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1996     auto *FnTy =
1997         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1998     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1999     break;
2000   }
2001   case OMPRTL__kmpc_omp_task_alloc: {
2002     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2003     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2004     // kmp_routine_entry_t *task_entry);
2005     assert(KmpRoutineEntryPtrTy != nullptr &&
2006            "Type kmp_routine_entry_t must be created.");
2007     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2008                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2009     // Return void * and then cast to particular kmp_task_t type.
2010     auto *FnTy =
2011         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2012     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2013     break;
2014   }
2015   case OMPRTL__kmpc_omp_target_task_alloc: {
2016     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2017     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2018     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2019     assert(KmpRoutineEntryPtrTy != nullptr &&
2020            "Type kmp_routine_entry_t must be created.");
2021     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2022                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2023                                 CGM.Int64Ty};
2024     // Return void * and then cast to particular kmp_task_t type.
2025     auto *FnTy =
2026         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2027     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2028     break;
2029   }
2030   case OMPRTL__kmpc_omp_task: {
2031     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2032     // *new_task);
2033     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2034                                 CGM.VoidPtrTy};
2035     auto *FnTy =
2036         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2037     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2038     break;
2039   }
2040   case OMPRTL__kmpc_copyprivate: {
2041     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2042     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2043     // kmp_int32 didit);
2044     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2045     auto *CpyFnTy =
2046         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2048                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2049                                 CGM.Int32Ty};
2050     auto *FnTy =
2051         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2052     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2053     break;
2054   }
2055   case OMPRTL__kmpc_reduce: {
2056     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2057     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2058     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2059     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2060     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2061                                                /*isVarArg=*/false);
2062     llvm::Type *TypeParams[] = {
2063         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2064         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2065         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2069     break;
2070   }
2071   case OMPRTL__kmpc_reduce_nowait: {
2072     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2073     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2074     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2075     // *lck);
2076     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2077     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2078                                                /*isVarArg=*/false);
2079     llvm::Type *TypeParams[] = {
2080         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2081         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2082         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2083     auto *FnTy =
2084         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2085     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2086     break;
2087   }
2088   case OMPRTL__kmpc_end_reduce: {
2089     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2090     // kmp_critical_name *lck);
2091     llvm::Type *TypeParams[] = {
2092         getIdentTyPointerTy(), CGM.Int32Ty,
2093         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2094     auto *FnTy =
2095         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2096     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2097     break;
2098   }
2099   case OMPRTL__kmpc_end_reduce_nowait: {
2100     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2101     // kmp_critical_name *lck);
2102     llvm::Type *TypeParams[] = {
2103         getIdentTyPointerTy(), CGM.Int32Ty,
2104         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2105     auto *FnTy =
2106         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2107     RTLFn =
2108         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2109     break;
2110   }
2111   case OMPRTL__kmpc_omp_task_begin_if0: {
2112     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2113     // *new_task);
2114     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2115                                 CGM.VoidPtrTy};
2116     auto *FnTy =
2117         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2118     RTLFn =
2119         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2120     break;
2121   }
2122   case OMPRTL__kmpc_omp_task_complete_if0: {
2123     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2124     // *new_task);
2125     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2126                                 CGM.VoidPtrTy};
2127     auto *FnTy =
2128         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2129     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2130                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2131     break;
2132   }
2133   case OMPRTL__kmpc_ordered: {
2134     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2135     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2136     auto *FnTy =
2137         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2138     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2139     break;
2140   }
2141   case OMPRTL__kmpc_end_ordered: {
2142     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2143     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2144     auto *FnTy =
2145         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2146     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2147     break;
2148   }
2149   case OMPRTL__kmpc_omp_taskwait: {
2150     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2151     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2155     break;
2156   }
2157   case OMPRTL__kmpc_taskgroup: {
2158     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2159     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2160     auto *FnTy =
2161         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2162     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2163     break;
2164   }
2165   case OMPRTL__kmpc_end_taskgroup: {
2166     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2167     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2168     auto *FnTy =
2169         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2170     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2171     break;
2172   }
2173   case OMPRTL__kmpc_push_proc_bind: {
2174     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2175     // int proc_bind)
2176     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2177     auto *FnTy =
2178         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2179     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2180     break;
2181   }
2182   case OMPRTL__kmpc_omp_task_with_deps: {
2183     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2184     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2185     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2186     llvm::Type *TypeParams[] = {
2187         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2188         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2189     auto *FnTy =
2190         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2191     RTLFn =
2192         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2193     break;
2194   }
2195   case OMPRTL__kmpc_omp_wait_deps: {
2196     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2197     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2198     // kmp_depend_info_t *noalias_dep_list);
2199     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2200                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2201                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2202     auto *FnTy =
2203         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2204     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2205     break;
2206   }
2207   case OMPRTL__kmpc_cancellationpoint: {
2208     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2209     // global_tid, kmp_int32 cncl_kind)
2210     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2211     auto *FnTy =
2212         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2213     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2214     break;
2215   }
2216   case OMPRTL__kmpc_cancel: {
2217     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2218     // kmp_int32 cncl_kind)
2219     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2220     auto *FnTy =
2221         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2222     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2223     break;
2224   }
2225   case OMPRTL__kmpc_push_num_teams: {
2226     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2227     // kmp_int32 num_teams, kmp_int32 num_threads)
2228     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2229         CGM.Int32Ty};
2230     auto *FnTy =
2231         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2232     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2233     break;
2234   }
2235   case OMPRTL__kmpc_fork_teams: {
2236     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2237     // microtask, ...);
2238     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2239                                 getKmpc_MicroPointerTy()};
2240     auto *FnTy =
2241         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2242     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2243     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2244       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2245         llvm::LLVMContext &Ctx = F->getContext();
2246         llvm::MDBuilder MDB(Ctx);
2247         // Annotate the callback behavior of the __kmpc_fork_teams:
2248         //  - The callback callee is argument number 2 (microtask).
2249         //  - The first two arguments of the callback callee are unknown (-1).
2250         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2251         //    callback callee.
2252         F->addMetadata(
2253             llvm::LLVMContext::MD_callback,
2254             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2255                                         2, {-1, -1},
2256                                         /* VarArgsArePassed */ true)}));
2257       }
2258     }
2259     break;
2260   }
2261   case OMPRTL__kmpc_taskloop: {
2262     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2263     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2264     // sched, kmp_uint64 grainsize, void *task_dup);
2265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2266                                 CGM.IntTy,
2267                                 CGM.VoidPtrTy,
2268                                 CGM.IntTy,
2269                                 CGM.Int64Ty->getPointerTo(),
2270                                 CGM.Int64Ty->getPointerTo(),
2271                                 CGM.Int64Ty,
2272                                 CGM.IntTy,
2273                                 CGM.IntTy,
2274                                 CGM.Int64Ty,
2275                                 CGM.VoidPtrTy};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2279     break;
2280   }
2281   case OMPRTL__kmpc_doacross_init: {
2282     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2283     // num_dims, struct kmp_dim *dims);
2284     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2285                                 CGM.Int32Ty,
2286                                 CGM.Int32Ty,
2287                                 CGM.VoidPtrTy};
2288     auto *FnTy =
2289         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2290     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2291     break;
2292   }
2293   case OMPRTL__kmpc_doacross_fini: {
2294     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2295     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2296     auto *FnTy =
2297         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2298     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2299     break;
2300   }
2301   case OMPRTL__kmpc_doacross_post: {
2302     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2303     // *vec);
2304     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2305                                 CGM.Int64Ty->getPointerTo()};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2308     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2309     break;
2310   }
2311   case OMPRTL__kmpc_doacross_wait: {
2312     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2313     // *vec);
2314     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2315                                 CGM.Int64Ty->getPointerTo()};
2316     auto *FnTy =
2317         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2318     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2319     break;
2320   }
2321   case OMPRTL__kmpc_task_reduction_init: {
2322     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2323     // *data);
2324     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2325     auto *FnTy =
2326         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2327     RTLFn =
2328         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2329     break;
2330   }
2331   case OMPRTL__kmpc_task_reduction_get_th_data: {
2332     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2333     // *d);
2334     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2337     RTLFn = CGM.CreateRuntimeFunction(
2338         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2339     break;
2340   }
2341   case OMPRTL__kmpc_alloc: {
2342     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2343     // al); omp_allocator_handle_t type is void *.
2344     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2345     auto *FnTy =
2346         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2347     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2348     break;
2349   }
2350   case OMPRTL__kmpc_free: {
2351     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2352     // al); omp_allocator_handle_t type is void *.
2353     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2354     auto *FnTy =
2355         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2356     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2357     break;
2358   }
2359   case OMPRTL__kmpc_push_target_tripcount: {
2360     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2361     // size);
2362     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2363     llvm::FunctionType *FnTy =
2364         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2365     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2366     break;
2367   }
2368   case OMPRTL__tgt_target: {
2369     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2370     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2371     // *arg_types);
2372     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2373                                 CGM.VoidPtrTy,
2374                                 CGM.Int32Ty,
2375                                 CGM.VoidPtrPtrTy,
2376                                 CGM.VoidPtrPtrTy,
2377                                 CGM.Int64Ty->getPointerTo(),
2378                                 CGM.Int64Ty->getPointerTo()};
2379     auto *FnTy =
2380         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2381     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2382     break;
2383   }
2384   case OMPRTL__tgt_target_nowait: {
2385     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2386     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2387     // int64_t *arg_types);
2388     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2389                                 CGM.VoidPtrTy,
2390                                 CGM.Int32Ty,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.VoidPtrPtrTy,
2393                                 CGM.Int64Ty->getPointerTo(),
2394                                 CGM.Int64Ty->getPointerTo()};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2398     break;
2399   }
2400   case OMPRTL__tgt_target_teams: {
2401     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2402     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2403     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2404     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2405                                 CGM.VoidPtrTy,
2406                                 CGM.Int32Ty,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.VoidPtrPtrTy,
2409                                 CGM.Int64Ty->getPointerTo(),
2410                                 CGM.Int64Ty->getPointerTo(),
2411                                 CGM.Int32Ty,
2412                                 CGM.Int32Ty};
2413     auto *FnTy =
2414         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2416     break;
2417   }
2418   case OMPRTL__tgt_target_teams_nowait: {
2419     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2420     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2421     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2422     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423                                 CGM.VoidPtrTy,
2424                                 CGM.Int32Ty,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.VoidPtrPtrTy,
2427                                 CGM.Int64Ty->getPointerTo(),
2428                                 CGM.Int64Ty->getPointerTo(),
2429                                 CGM.Int32Ty,
2430                                 CGM.Int32Ty};
2431     auto *FnTy =
2432         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2433     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2434     break;
2435   }
2436   case OMPRTL__tgt_register_requires: {
2437     // Build void __tgt_register_requires(int64_t flags);
2438     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2439     auto *FnTy =
2440         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2442     break;
2443   }
2444   case OMPRTL__tgt_target_data_begin: {
2445     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2446     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2447     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2448                                 CGM.Int32Ty,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.Int64Ty->getPointerTo(),
2452                                 CGM.Int64Ty->getPointerTo()};
2453     auto *FnTy =
2454         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2455     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2456     break;
2457   }
2458   case OMPRTL__tgt_target_data_begin_nowait: {
2459     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2460     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2461     // *arg_types);
2462     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2463                                 CGM.Int32Ty,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.Int64Ty->getPointerTo(),
2467                                 CGM.Int64Ty->getPointerTo()};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_target_data_end: {
2474     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2475     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2476     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2477                                 CGM.Int32Ty,
2478                                 CGM.VoidPtrPtrTy,
2479                                 CGM.VoidPtrPtrTy,
2480                                 CGM.Int64Ty->getPointerTo(),
2481                                 CGM.Int64Ty->getPointerTo()};
2482     auto *FnTy =
2483         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2484     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2485     break;
2486   }
2487   case OMPRTL__tgt_target_data_end_nowait: {
2488     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2489     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2490     // *arg_types);
2491     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2492                                 CGM.Int32Ty,
2493                                 CGM.VoidPtrPtrTy,
2494                                 CGM.VoidPtrPtrTy,
2495                                 CGM.Int64Ty->getPointerTo(),
2496                                 CGM.Int64Ty->getPointerTo()};
2497     auto *FnTy =
2498         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2499     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2500     break;
2501   }
2502   case OMPRTL__tgt_target_data_update: {
2503     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2504     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2505     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2506                                 CGM.Int32Ty,
2507                                 CGM.VoidPtrPtrTy,
2508                                 CGM.VoidPtrPtrTy,
2509                                 CGM.Int64Ty->getPointerTo(),
2510                                 CGM.Int64Ty->getPointerTo()};
2511     auto *FnTy =
2512         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2513     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2514     break;
2515   }
2516   case OMPRTL__tgt_target_data_update_nowait: {
2517     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2518     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2519     // *arg_types);
2520     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2521                                 CGM.Int32Ty,
2522                                 CGM.VoidPtrPtrTy,
2523                                 CGM.VoidPtrPtrTy,
2524                                 CGM.Int64Ty->getPointerTo(),
2525                                 CGM.Int64Ty->getPointerTo()};
2526     auto *FnTy =
2527         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2528     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2529     break;
2530   }
2531   case OMPRTL__tgt_mapper_num_components: {
2532     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2533     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2534     auto *FnTy =
2535         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2536     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2537     break;
2538   }
2539   case OMPRTL__tgt_push_mapper_component: {
2540     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2541     // *base, void *begin, int64_t size, int64_t type);
2542     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2543                                 CGM.Int64Ty, CGM.Int64Ty};
2544     auto *FnTy =
2545         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2546     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2547     break;
2548   }
2549   case OMPRTL__kmpc_task_allow_completion_event: {
2550     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
2551     // int gtid, kmp_task_t *task);
2552     auto *FnTy = llvm::FunctionType::get(
2553         CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy},
2554         /*isVarArg=*/false);
2555     RTLFn =
2556         CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event");
2557     break;
2558   }
2559   }
2560   assert(RTLFn && "Unable to find OpenMP runtime function");
2561   return RTLFn;
2562 }
2563 
2564 llvm::FunctionCallee
2565 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2566   assert((IVSize == 32 || IVSize == 64) &&
2567          "IV size is not compatible with the omp runtime");
2568   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2569                                             : "__kmpc_for_static_init_4u")
2570                                 : (IVSigned ? "__kmpc_for_static_init_8"
2571                                             : "__kmpc_for_static_init_8u");
2572   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2573   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2574   llvm::Type *TypeParams[] = {
2575     getIdentTyPointerTy(),                     // loc
2576     CGM.Int32Ty,                               // tid
2577     CGM.Int32Ty,                               // schedtype
2578     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2579     PtrTy,                                     // p_lower
2580     PtrTy,                                     // p_upper
2581     PtrTy,                                     // p_stride
2582     ITy,                                       // incr
2583     ITy                                        // chunk
2584   };
2585   auto *FnTy =
2586       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2587   return CGM.CreateRuntimeFunction(FnTy, Name);
2588 }
2589 
2590 llvm::FunctionCallee
2591 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2592   assert((IVSize == 32 || IVSize == 64) &&
2593          "IV size is not compatible with the omp runtime");
2594   StringRef Name =
2595       IVSize == 32
2596           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2597           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2598   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2599   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2600                                CGM.Int32Ty,           // tid
2601                                CGM.Int32Ty,           // schedtype
2602                                ITy,                   // lower
2603                                ITy,                   // upper
2604                                ITy,                   // stride
2605                                ITy                    // chunk
2606   };
2607   auto *FnTy =
2608       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2609   return CGM.CreateRuntimeFunction(FnTy, Name);
2610 }
2611 
2612 llvm::FunctionCallee
2613 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2614   assert((IVSize == 32 || IVSize == 64) &&
2615          "IV size is not compatible with the omp runtime");
2616   StringRef Name =
2617       IVSize == 32
2618           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2619           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2620   llvm::Type *TypeParams[] = {
2621       getIdentTyPointerTy(), // loc
2622       CGM.Int32Ty,           // tid
2623   };
2624   auto *FnTy =
2625       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2626   return CGM.CreateRuntimeFunction(FnTy, Name);
2627 }
2628 
2629 llvm::FunctionCallee
2630 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2631   assert((IVSize == 32 || IVSize == 64) &&
2632          "IV size is not compatible with the omp runtime");
2633   StringRef Name =
2634       IVSize == 32
2635           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2636           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2637   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2638   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2639   llvm::Type *TypeParams[] = {
2640     getIdentTyPointerTy(),                     // loc
2641     CGM.Int32Ty,                               // tid
2642     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2643     PtrTy,                                     // p_lower
2644     PtrTy,                                     // p_upper
2645     PtrTy                                      // p_stride
2646   };
2647   auto *FnTy =
2648       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2649   return CGM.CreateRuntimeFunction(FnTy, Name);
2650 }
2651 
2652 /// Obtain information that uniquely identifies a target entry. This
2653 /// consists of the file and device IDs as well as line number associated with
2654 /// the relevant entry source location.
2655 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2656                                      unsigned &DeviceID, unsigned &FileID,
2657                                      unsigned &LineNum) {
2658   SourceManager &SM = C.getSourceManager();
2659 
2660   // The loc should be always valid and have a file ID (the user cannot use
2661   // #pragma directives in macros)
2662 
2663   assert(Loc.isValid() && "Source location is expected to be always valid.");
2664 
2665   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2666   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2667 
2668   llvm::sys::fs::UniqueID ID;
2669   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2670     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2671         << PLoc.getFilename() << EC.message();
2672 
2673   DeviceID = ID.getDevice();
2674   FileID = ID.getFile();
2675   LineNum = PLoc.getLine();
2676 }
2677 
2678 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2679   if (CGM.getLangOpts().OpenMPSimd)
2680     return Address::invalid();
2681   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2682       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2683   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2684               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2685                HasRequiresUnifiedSharedMemory))) {
2686     SmallString<64> PtrName;
2687     {
2688       llvm::raw_svector_ostream OS(PtrName);
2689       OS << CGM.getMangledName(GlobalDecl(VD));
2690       if (!VD->isExternallyVisible()) {
2691         unsigned DeviceID, FileID, Line;
2692         getTargetEntryUniqueInfo(CGM.getContext(),
2693                                  VD->getCanonicalDecl()->getBeginLoc(),
2694                                  DeviceID, FileID, Line);
2695         OS << llvm::format("_%x", FileID);
2696       }
2697       OS << "_decl_tgt_ref_ptr";
2698     }
2699     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2700     if (!Ptr) {
2701       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2702       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2703                                         PtrName);
2704 
2705       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2706       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2707 
2708       if (!CGM.getLangOpts().OpenMPIsDevice)
2709         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2710       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2711     }
2712     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2713   }
2714   return Address::invalid();
2715 }
2716 
2717 llvm::Constant *
2718 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2719   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2720          !CGM.getContext().getTargetInfo().isTLSSupported());
2721   // Lookup the entry, lazily creating it if necessary.
2722   std::string Suffix = getName({"cache", ""});
2723   return getOrCreateInternalVariable(
2724       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2725 }
2726 
2727 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2728                                                 const VarDecl *VD,
2729                                                 Address VDAddr,
2730                                                 SourceLocation Loc) {
2731   if (CGM.getLangOpts().OpenMPUseTLS &&
2732       CGM.getContext().getTargetInfo().isTLSSupported())
2733     return VDAddr;
2734 
2735   llvm::Type *VarTy = VDAddr.getElementType();
2736   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2737                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2738                                                        CGM.Int8PtrTy),
2739                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2740                          getOrCreateThreadPrivateCache(VD)};
2741   return Address(CGF.EmitRuntimeCall(
2742       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2743                  VDAddr.getAlignment());
2744 }
2745 
2746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2750   // library.
2751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2752   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2753                       OMPLoc);
2754   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2755   // to register constructor/destructor for variable.
2756   llvm::Value *Args[] = {
2757       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2758       Ctor, CopyCtor, Dtor};
2759   CGF.EmitRuntimeCall(
2760       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2761 }
2762 
2763 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2764     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2765     bool PerformInit, CodeGenFunction *CGF) {
2766   if (CGM.getLangOpts().OpenMPUseTLS &&
2767       CGM.getContext().getTargetInfo().isTLSSupported())
2768     return nullptr;
2769 
2770   VD = VD->getDefinition(CGM.getContext());
2771   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2772     QualType ASTTy = VD->getType();
2773 
2774     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2775     const Expr *Init = VD->getAnyInitializer();
2776     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2777       // Generate function that re-emits the declaration's initializer into the
2778       // threadprivate copy of the variable VD
2779       CodeGenFunction CtorCGF(CGM);
2780       FunctionArgList Args;
2781       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2782                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2783                             ImplicitParamDecl::Other);
2784       Args.push_back(&Dst);
2785 
2786       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2787           CGM.getContext().VoidPtrTy, Args);
2788       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2789       std::string Name = getName({"__kmpc_global_ctor_", ""});
2790       llvm::Function *Fn =
2791           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2792       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2793                             Args, Loc, Loc);
2794       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2795           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2796           CGM.getContext().VoidPtrTy, Dst.getLocation());
2797       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2798       Arg = CtorCGF.Builder.CreateElementBitCast(
2799           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2800       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2801                                /*IsInitializer=*/true);
2802       ArgVal = CtorCGF.EmitLoadOfScalar(
2803           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2804           CGM.getContext().VoidPtrTy, Dst.getLocation());
2805       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2806       CtorCGF.FinishFunction();
2807       Ctor = Fn;
2808     }
2809     if (VD->getType().isDestructedType() != QualType::DK_none) {
2810       // Generate function that emits destructor call for the threadprivate copy
2811       // of the variable VD
2812       CodeGenFunction DtorCGF(CGM);
2813       FunctionArgList Args;
2814       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2815                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2816                             ImplicitParamDecl::Other);
2817       Args.push_back(&Dst);
2818 
2819       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2820           CGM.getContext().VoidTy, Args);
2821       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2822       std::string Name = getName({"__kmpc_global_dtor_", ""});
2823       llvm::Function *Fn =
2824           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2825       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2826       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2827                             Loc, Loc);
2828       // Create a scope with an artificial location for the body of this function.
2829       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2830       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2831           DtorCGF.GetAddrOfLocalVar(&Dst),
2832           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2833       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2834                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2835                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2836       DtorCGF.FinishFunction();
2837       Dtor = Fn;
2838     }
2839     // Do not emit init function if it is not required.
2840     if (!Ctor && !Dtor)
2841       return nullptr;
2842 
2843     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2844     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2845                                                /*isVarArg=*/false)
2846                            ->getPointerTo();
2847     // Copying constructor for the threadprivate variable.
2848     // Must be NULL - reserved by runtime, but currently it requires that this
2849     // parameter is always NULL. Otherwise it fires assertion.
2850     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2851     if (Ctor == nullptr) {
2852       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2853                                              /*isVarArg=*/false)
2854                          ->getPointerTo();
2855       Ctor = llvm::Constant::getNullValue(CtorTy);
2856     }
2857     if (Dtor == nullptr) {
2858       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2859                                              /*isVarArg=*/false)
2860                          ->getPointerTo();
2861       Dtor = llvm::Constant::getNullValue(DtorTy);
2862     }
2863     if (!CGF) {
2864       auto *InitFunctionTy =
2865           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2866       std::string Name = getName({"__omp_threadprivate_init_", ""});
2867       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2868           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2869       CodeGenFunction InitCGF(CGM);
2870       FunctionArgList ArgList;
2871       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2872                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2873                             Loc, Loc);
2874       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2875       InitCGF.FinishFunction();
2876       return InitFunction;
2877     }
2878     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2879   }
2880   return nullptr;
2881 }
2882 
2883 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2884                                                      llvm::GlobalVariable *Addr,
2885                                                      bool PerformInit) {
2886   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2887       !CGM.getLangOpts().OpenMPIsDevice)
2888     return false;
2889   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2890       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2891   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2892       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2893        HasRequiresUnifiedSharedMemory))
2894     return CGM.getLangOpts().OpenMPIsDevice;
2895   VD = VD->getDefinition(CGM.getContext());
2896   assert(VD && "Unknown VarDecl");
2897 
2898   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2899     return CGM.getLangOpts().OpenMPIsDevice;
2900 
2901   QualType ASTTy = VD->getType();
2902   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2903 
2904   // Produce the unique prefix to identify the new target regions. We use
2905   // the source location of the variable declaration which we know to not
2906   // conflict with any target region.
2907   unsigned DeviceID;
2908   unsigned FileID;
2909   unsigned Line;
2910   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2911   SmallString<128> Buffer, Out;
2912   {
2913     llvm::raw_svector_ostream OS(Buffer);
2914     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2915        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2916   }
2917 
2918   const Expr *Init = VD->getAnyInitializer();
2919   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2920     llvm::Constant *Ctor;
2921     llvm::Constant *ID;
2922     if (CGM.getLangOpts().OpenMPIsDevice) {
2923       // Generate function that re-emits the declaration's initializer into
2924       // the threadprivate copy of the variable VD
2925       CodeGenFunction CtorCGF(CGM);
2926 
2927       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2928       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2929       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2930           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2931       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2932       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2933                             FunctionArgList(), Loc, Loc);
2934       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2935       CtorCGF.EmitAnyExprToMem(Init,
2936                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2937                                Init->getType().getQualifiers(),
2938                                /*IsInitializer=*/true);
2939       CtorCGF.FinishFunction();
2940       Ctor = Fn;
2941       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2942       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2943     } else {
2944       Ctor = new llvm::GlobalVariable(
2945           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2946           llvm::GlobalValue::PrivateLinkage,
2947           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2948       ID = Ctor;
2949     }
2950 
2951     // Register the information for the entry associated with the constructor.
2952     Out.clear();
2953     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2954         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2955         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2956   }
2957   if (VD->getType().isDestructedType() != QualType::DK_none) {
2958     llvm::Constant *Dtor;
2959     llvm::Constant *ID;
2960     if (CGM.getLangOpts().OpenMPIsDevice) {
2961       // Generate function that emits destructor call for the threadprivate
2962       // copy of the variable VD
2963       CodeGenFunction DtorCGF(CGM);
2964 
2965       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2966       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2967       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2968           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2969       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2970       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2971                             FunctionArgList(), Loc, Loc);
2972       // Create a scope with an artificial location for the body of this
2973       // function.
2974       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2975       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2976                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2977                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2978       DtorCGF.FinishFunction();
2979       Dtor = Fn;
2980       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2981       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2982     } else {
2983       Dtor = new llvm::GlobalVariable(
2984           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2985           llvm::GlobalValue::PrivateLinkage,
2986           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2987       ID = Dtor;
2988     }
2989     // Register the information for the entry associated with the destructor.
2990     Out.clear();
2991     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2992         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2993         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2994   }
2995   return CGM.getLangOpts().OpenMPIsDevice;
2996 }
2997 
2998 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2999                                                           QualType VarType,
3000                                                           StringRef Name) {
3001   std::string Suffix = getName({"artificial", ""});
3002   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3003   llvm::Value *GAddr =
3004       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3005   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3006       CGM.getTarget().isTLSSupported()) {
3007     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3008     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3009   }
3010   std::string CacheSuffix = getName({"cache", ""});
3011   llvm::Value *Args[] = {
3012       emitUpdateLocation(CGF, SourceLocation()),
3013       getThreadID(CGF, SourceLocation()),
3014       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3015       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3016                                 /*isSigned=*/false),
3017       getOrCreateInternalVariable(
3018           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3019   return Address(
3020       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3021           CGF.EmitRuntimeCall(
3022               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3023           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3024       CGM.getContext().getTypeAlignInChars(VarType));
3025 }
3026 
3027 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3028                                    const RegionCodeGenTy &ThenGen,
3029                                    const RegionCodeGenTy &ElseGen) {
3030   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3031 
3032   // If the condition constant folds and can be elided, try to avoid emitting
3033   // the condition and the dead arm of the if/else.
3034   bool CondConstant;
3035   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3036     if (CondConstant)
3037       ThenGen(CGF);
3038     else
3039       ElseGen(CGF);
3040     return;
3041   }
3042 
3043   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3044   // emit the conditional branch.
3045   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3046   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3047   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3048   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3049 
3050   // Emit the 'then' code.
3051   CGF.EmitBlock(ThenBlock);
3052   ThenGen(CGF);
3053   CGF.EmitBranch(ContBlock);
3054   // Emit the 'else' code if present.
3055   // There is no need to emit line number for unconditional branch.
3056   (void)ApplyDebugLocation::CreateEmpty(CGF);
3057   CGF.EmitBlock(ElseBlock);
3058   ElseGen(CGF);
3059   // There is no need to emit line number for unconditional branch.
3060   (void)ApplyDebugLocation::CreateEmpty(CGF);
3061   CGF.EmitBranch(ContBlock);
3062   // Emit the continuation block for code after the if.
3063   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3064 }
3065 
3066 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3067                                        llvm::Function *OutlinedFn,
3068                                        ArrayRef<llvm::Value *> CapturedVars,
3069                                        const Expr *IfCond) {
3070   if (!CGF.HaveInsertPoint())
3071     return;
3072   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3073   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3074                                                      PrePostActionTy &) {
3075     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3076     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3077     llvm::Value *Args[] = {
3078         RTLoc,
3079         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3080         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3081     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3082     RealArgs.append(std::begin(Args), std::end(Args));
3083     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3084 
3085     llvm::FunctionCallee RTLFn =
3086         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3087     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3088   };
3089   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3090                                                           PrePostActionTy &) {
3091     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3092     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3093     // Build calls:
3094     // __kmpc_serialized_parallel(&Loc, GTid);
3095     llvm::Value *Args[] = {RTLoc, ThreadID};
3096     CGF.EmitRuntimeCall(
3097         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3098 
3099     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3100     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3101     Address ZeroAddrBound =
3102         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3103                                          /*Name=*/".bound.zero.addr");
3104     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3105     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3106     // ThreadId for serialized parallels is 0.
3107     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3108     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3109     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3110     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3111 
3112     // __kmpc_end_serialized_parallel(&Loc, GTid);
3113     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3114     CGF.EmitRuntimeCall(
3115         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3116         EndArgs);
3117   };
3118   if (IfCond) {
3119     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3120   } else {
3121     RegionCodeGenTy ThenRCG(ThenGen);
3122     ThenRCG(CGF);
3123   }
3124 }
3125 
3126 // If we're inside an (outlined) parallel region, use the region info's
3127 // thread-ID variable (it is passed in a first argument of the outlined function
3128 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3129 // regular serial code region, get thread ID by calling kmp_int32
3130 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3131 // return the address of that temp.
3132 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3133                                              SourceLocation Loc) {
3134   if (auto *OMPRegionInfo =
3135           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3136     if (OMPRegionInfo->getThreadIDVariable())
3137       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3138 
3139   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3140   QualType Int32Ty =
3141       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3142   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3143   CGF.EmitStoreOfScalar(ThreadID,
3144                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3145 
3146   return ThreadIDTemp;
3147 }
3148 
3149 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3150     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3151   SmallString<256> Buffer;
3152   llvm::raw_svector_ostream Out(Buffer);
3153   Out << Name;
3154   StringRef RuntimeName = Out.str();
3155   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3156   if (Elem.second) {
3157     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3158            "OMP internal variable has different type than requested");
3159     return &*Elem.second;
3160   }
3161 
3162   return Elem.second = new llvm::GlobalVariable(
3163              CGM.getModule(), Ty, /*IsConstant*/ false,
3164              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3165              Elem.first(), /*InsertBefore=*/nullptr,
3166              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3167 }
3168 
3169 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3170   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3171   std::string Name = getName({Prefix, "var"});
3172   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3173 }
3174 
3175 namespace {
3176 /// Common pre(post)-action for different OpenMP constructs.
3177 class CommonActionTy final : public PrePostActionTy {
3178   llvm::FunctionCallee EnterCallee;
3179   ArrayRef<llvm::Value *> EnterArgs;
3180   llvm::FunctionCallee ExitCallee;
3181   ArrayRef<llvm::Value *> ExitArgs;
3182   bool Conditional;
3183   llvm::BasicBlock *ContBlock = nullptr;
3184 
3185 public:
3186   CommonActionTy(llvm::FunctionCallee EnterCallee,
3187                  ArrayRef<llvm::Value *> EnterArgs,
3188                  llvm::FunctionCallee ExitCallee,
3189                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3190       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3191         ExitArgs(ExitArgs), Conditional(Conditional) {}
3192   void Enter(CodeGenFunction &CGF) override {
3193     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3194     if (Conditional) {
3195       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3196       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3197       ContBlock = CGF.createBasicBlock("omp_if.end");
3198       // Generate the branch (If-stmt)
3199       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3200       CGF.EmitBlock(ThenBlock);
3201     }
3202   }
3203   void Done(CodeGenFunction &CGF) {
3204     // Emit the rest of blocks/branches
3205     CGF.EmitBranch(ContBlock);
3206     CGF.EmitBlock(ContBlock, true);
3207   }
3208   void Exit(CodeGenFunction &CGF) override {
3209     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3210   }
3211 };
3212 } // anonymous namespace
3213 
3214 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3215                                          StringRef CriticalName,
3216                                          const RegionCodeGenTy &CriticalOpGen,
3217                                          SourceLocation Loc, const Expr *Hint) {
3218   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3219   // CriticalOpGen();
3220   // __kmpc_end_critical(ident_t *, gtid, Lock);
3221   // Prepare arguments and build a call to __kmpc_critical
3222   if (!CGF.HaveInsertPoint())
3223     return;
3224   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3225                          getCriticalRegionLock(CriticalName)};
3226   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3227                                                 std::end(Args));
3228   if (Hint) {
3229     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3230         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3231   }
3232   CommonActionTy Action(
3233       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3234                                  : OMPRTL__kmpc_critical),
3235       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3236   CriticalOpGen.setAction(Action);
3237   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3238 }
3239 
3240 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3241                                        const RegionCodeGenTy &MasterOpGen,
3242                                        SourceLocation Loc) {
3243   if (!CGF.HaveInsertPoint())
3244     return;
3245   // if(__kmpc_master(ident_t *, gtid)) {
3246   //   MasterOpGen();
3247   //   __kmpc_end_master(ident_t *, gtid);
3248   // }
3249   // Prepare arguments and build a call to __kmpc_master
3250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3251   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3252                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3253                         /*Conditional=*/true);
3254   MasterOpGen.setAction(Action);
3255   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3256   Action.Done(CGF);
3257 }
3258 
3259 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3260                                         SourceLocation Loc) {
3261   if (!CGF.HaveInsertPoint())
3262     return;
3263   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3264   if (OMPBuilder) {
3265     OMPBuilder->CreateTaskyield(CGF.Builder);
3266   } else {
3267     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3268     llvm::Value *Args[] = {
3269         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3270         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3271     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3272                         Args);
3273   }
3274 
3275   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3276     Region->emitUntiedSwitch(CGF);
3277 }
3278 
3279 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3280                                           const RegionCodeGenTy &TaskgroupOpGen,
3281                                           SourceLocation Loc) {
3282   if (!CGF.HaveInsertPoint())
3283     return;
3284   // __kmpc_taskgroup(ident_t *, gtid);
3285   // TaskgroupOpGen();
3286   // __kmpc_end_taskgroup(ident_t *, gtid);
3287   // Prepare arguments and build a call to __kmpc_taskgroup
3288   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3289   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3290                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3291                         Args);
3292   TaskgroupOpGen.setAction(Action);
3293   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3294 }
3295 
3296 /// Given an array of pointers to variables, project the address of a
3297 /// given variable.
3298 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3299                                       unsigned Index, const VarDecl *Var) {
3300   // Pull out the pointer to the variable.
3301   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3302   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3303 
3304   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3305   Addr = CGF.Builder.CreateElementBitCast(
3306       Addr, CGF.ConvertTypeForMem(Var->getType()));
3307   return Addr;
3308 }
3309 
3310 static llvm::Value *emitCopyprivateCopyFunction(
3311     CodeGenModule &CGM, llvm::Type *ArgsType,
3312     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3313     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3314     SourceLocation Loc) {
3315   ASTContext &C = CGM.getContext();
3316   // void copy_func(void *LHSArg, void *RHSArg);
3317   FunctionArgList Args;
3318   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3319                            ImplicitParamDecl::Other);
3320   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3321                            ImplicitParamDecl::Other);
3322   Args.push_back(&LHSArg);
3323   Args.push_back(&RHSArg);
3324   const auto &CGFI =
3325       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3326   std::string Name =
3327       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3328   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3329                                     llvm::GlobalValue::InternalLinkage, Name,
3330                                     &CGM.getModule());
3331   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3332   Fn->setDoesNotRecurse();
3333   CodeGenFunction CGF(CGM);
3334   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3335   // Dest = (void*[n])(LHSArg);
3336   // Src = (void*[n])(RHSArg);
3337   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3338       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3339       ArgsType), CGF.getPointerAlign());
3340   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3341       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3342       ArgsType), CGF.getPointerAlign());
3343   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3344   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3345   // ...
3346   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3347   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3348     const auto *DestVar =
3349         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3350     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3351 
3352     const auto *SrcVar =
3353         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3354     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3355 
3356     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3357     QualType Type = VD->getType();
3358     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3359   }
3360   CGF.FinishFunction();
3361   return Fn;
3362 }
3363 
3364 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3365                                        const RegionCodeGenTy &SingleOpGen,
3366                                        SourceLocation Loc,
3367                                        ArrayRef<const Expr *> CopyprivateVars,
3368                                        ArrayRef<const Expr *> SrcExprs,
3369                                        ArrayRef<const Expr *> DstExprs,
3370                                        ArrayRef<const Expr *> AssignmentOps) {
3371   if (!CGF.HaveInsertPoint())
3372     return;
3373   assert(CopyprivateVars.size() == SrcExprs.size() &&
3374          CopyprivateVars.size() == DstExprs.size() &&
3375          CopyprivateVars.size() == AssignmentOps.size());
3376   ASTContext &C = CGM.getContext();
3377   // int32 did_it = 0;
3378   // if(__kmpc_single(ident_t *, gtid)) {
3379   //   SingleOpGen();
3380   //   __kmpc_end_single(ident_t *, gtid);
3381   //   did_it = 1;
3382   // }
3383   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3384   // <copy_func>, did_it);
3385 
3386   Address DidIt = Address::invalid();
3387   if (!CopyprivateVars.empty()) {
3388     // int32 did_it = 0;
3389     QualType KmpInt32Ty =
3390         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3391     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3392     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3393   }
3394   // Prepare arguments and build a call to __kmpc_single
3395   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3396   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3397                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3398                         /*Conditional=*/true);
3399   SingleOpGen.setAction(Action);
3400   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3401   if (DidIt.isValid()) {
3402     // did_it = 1;
3403     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3404   }
3405   Action.Done(CGF);
3406   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3407   // <copy_func>, did_it);
3408   if (DidIt.isValid()) {
3409     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3410     QualType CopyprivateArrayTy = C.getConstantArrayType(
3411         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3412         /*IndexTypeQuals=*/0);
3413     // Create a list of all private variables for copyprivate.
3414     Address CopyprivateList =
3415         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3416     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3417       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3418       CGF.Builder.CreateStore(
3419           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3420               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3421               CGF.VoidPtrTy),
3422           Elem);
3423     }
3424     // Build function that copies private values from single region to all other
3425     // threads in the corresponding parallel region.
3426     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3427         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3428         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3429     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3430     Address CL =
3431       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3432                                                       CGF.VoidPtrTy);
3433     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3434     llvm::Value *Args[] = {
3435         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3436         getThreadID(CGF, Loc),        // i32 <gtid>
3437         BufSize,                      // size_t <buf_size>
3438         CL.getPointer(),              // void *<copyprivate list>
3439         CpyFn,                        // void (*) (void *, void *) <copy_func>
3440         DidItVal                      // i32 did_it
3441     };
3442     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3443   }
3444 }
3445 
3446 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3447                                         const RegionCodeGenTy &OrderedOpGen,
3448                                         SourceLocation Loc, bool IsThreads) {
3449   if (!CGF.HaveInsertPoint())
3450     return;
3451   // __kmpc_ordered(ident_t *, gtid);
3452   // OrderedOpGen();
3453   // __kmpc_end_ordered(ident_t *, gtid);
3454   // Prepare arguments and build a call to __kmpc_ordered
3455   if (IsThreads) {
3456     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3457     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3458                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3459                           Args);
3460     OrderedOpGen.setAction(Action);
3461     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3462     return;
3463   }
3464   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3465 }
3466 
3467 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3468   unsigned Flags;
3469   if (Kind == OMPD_for)
3470     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3471   else if (Kind == OMPD_sections)
3472     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3473   else if (Kind == OMPD_single)
3474     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3475   else if (Kind == OMPD_barrier)
3476     Flags = OMP_IDENT_BARRIER_EXPL;
3477   else
3478     Flags = OMP_IDENT_BARRIER_IMPL;
3479   return Flags;
3480 }
3481 
3482 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3483     CodeGenFunction &CGF, const OMPLoopDirective &S,
3484     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3485   // Check if the loop directive is actually a doacross loop directive. In this
3486   // case choose static, 1 schedule.
3487   if (llvm::any_of(
3488           S.getClausesOfKind<OMPOrderedClause>(),
3489           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3490     ScheduleKind = OMPC_SCHEDULE_static;
3491     // Chunk size is 1 in this case.
3492     llvm::APInt ChunkSize(32, 1);
3493     ChunkExpr = IntegerLiteral::Create(
3494         CGF.getContext(), ChunkSize,
3495         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3496         SourceLocation());
3497   }
3498 }
3499 
3500 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3501                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3502                                       bool ForceSimpleCall) {
3503   // Check if we should use the OMPBuilder
3504   auto *OMPRegionInfo =
3505       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3506   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3507   if (OMPBuilder) {
3508     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3509         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3510     return;
3511   }
3512 
3513   if (!CGF.HaveInsertPoint())
3514     return;
3515   // Build call __kmpc_cancel_barrier(loc, thread_id);
3516   // Build call __kmpc_barrier(loc, thread_id);
3517   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3518   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3519   // thread_id);
3520   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3521                          getThreadID(CGF, Loc)};
3522   if (OMPRegionInfo) {
3523     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3524       llvm::Value *Result = CGF.EmitRuntimeCall(
3525           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3526       if (EmitChecks) {
3527         // if (__kmpc_cancel_barrier()) {
3528         //   exit from construct;
3529         // }
3530         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3531         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3532         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3533         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3534         CGF.EmitBlock(ExitBB);
3535         //   exit from construct;
3536         CodeGenFunction::JumpDest CancelDestination =
3537             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3538         CGF.EmitBranchThroughCleanup(CancelDestination);
3539         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3540       }
3541       return;
3542     }
3543   }
3544   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3545 }
3546 
3547 /// Map the OpenMP loop schedule to the runtime enumeration.
3548 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3549                                           bool Chunked, bool Ordered) {
3550   switch (ScheduleKind) {
3551   case OMPC_SCHEDULE_static:
3552     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3553                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3554   case OMPC_SCHEDULE_dynamic:
3555     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3556   case OMPC_SCHEDULE_guided:
3557     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3558   case OMPC_SCHEDULE_runtime:
3559     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3560   case OMPC_SCHEDULE_auto:
3561     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3562   case OMPC_SCHEDULE_unknown:
3563     assert(!Chunked && "chunk was specified but schedule kind not known");
3564     return Ordered ? OMP_ord_static : OMP_sch_static;
3565   }
3566   llvm_unreachable("Unexpected runtime schedule");
3567 }
3568 
3569 /// Map the OpenMP distribute schedule to the runtime enumeration.
3570 static OpenMPSchedType
3571 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3572   // only static is allowed for dist_schedule
3573   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3574 }
3575 
3576 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3577                                          bool Chunked) const {
3578   OpenMPSchedType Schedule =
3579       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3580   return Schedule == OMP_sch_static;
3581 }
3582 
3583 bool CGOpenMPRuntime::isStaticNonchunked(
3584     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3585   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3586   return Schedule == OMP_dist_sch_static;
3587 }
3588 
3589 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3590                                       bool Chunked) const {
3591   OpenMPSchedType Schedule =
3592       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3593   return Schedule == OMP_sch_static_chunked;
3594 }
3595 
3596 bool CGOpenMPRuntime::isStaticChunked(
3597     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3598   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3599   return Schedule == OMP_dist_sch_static_chunked;
3600 }
3601 
3602 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3603   OpenMPSchedType Schedule =
3604       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3605   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3606   return Schedule != OMP_sch_static;
3607 }
3608 
3609 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3610                                   OpenMPScheduleClauseModifier M1,
3611                                   OpenMPScheduleClauseModifier M2) {
3612   int Modifier = 0;
3613   switch (M1) {
3614   case OMPC_SCHEDULE_MODIFIER_monotonic:
3615     Modifier = OMP_sch_modifier_monotonic;
3616     break;
3617   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3618     Modifier = OMP_sch_modifier_nonmonotonic;
3619     break;
3620   case OMPC_SCHEDULE_MODIFIER_simd:
3621     if (Schedule == OMP_sch_static_chunked)
3622       Schedule = OMP_sch_static_balanced_chunked;
3623     break;
3624   case OMPC_SCHEDULE_MODIFIER_last:
3625   case OMPC_SCHEDULE_MODIFIER_unknown:
3626     break;
3627   }
3628   switch (M2) {
3629   case OMPC_SCHEDULE_MODIFIER_monotonic:
3630     Modifier = OMP_sch_modifier_monotonic;
3631     break;
3632   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3633     Modifier = OMP_sch_modifier_nonmonotonic;
3634     break;
3635   case OMPC_SCHEDULE_MODIFIER_simd:
3636     if (Schedule == OMP_sch_static_chunked)
3637       Schedule = OMP_sch_static_balanced_chunked;
3638     break;
3639   case OMPC_SCHEDULE_MODIFIER_last:
3640   case OMPC_SCHEDULE_MODIFIER_unknown:
3641     break;
3642   }
3643   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3644   // If the static schedule kind is specified or if the ordered clause is
3645   // specified, and if the nonmonotonic modifier is not specified, the effect is
3646   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3647   // modifier is specified, the effect is as if the nonmonotonic modifier is
3648   // specified.
3649   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3650     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3651           Schedule == OMP_sch_static_balanced_chunked ||
3652           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3653           Schedule == OMP_dist_sch_static_chunked ||
3654           Schedule == OMP_dist_sch_static))
3655       Modifier = OMP_sch_modifier_nonmonotonic;
3656   }
3657   return Schedule | Modifier;
3658 }
3659 
3660 void CGOpenMPRuntime::emitForDispatchInit(
3661     CodeGenFunction &CGF, SourceLocation Loc,
3662     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3663     bool Ordered, const DispatchRTInput &DispatchValues) {
3664   if (!CGF.HaveInsertPoint())
3665     return;
3666   OpenMPSchedType Schedule = getRuntimeSchedule(
3667       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3668   assert(Ordered ||
3669          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3670           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3671           Schedule != OMP_sch_static_balanced_chunked));
3672   // Call __kmpc_dispatch_init(
3673   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3674   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3675   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3676 
3677   // If the Chunk was not specified in the clause - use default value 1.
3678   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3679                                             : CGF.Builder.getIntN(IVSize, 1);
3680   llvm::Value *Args[] = {
3681       emitUpdateLocation(CGF, Loc),
3682       getThreadID(CGF, Loc),
3683       CGF.Builder.getInt32(addMonoNonMonoModifier(
3684           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3685       DispatchValues.LB,                                     // Lower
3686       DispatchValues.UB,                                     // Upper
3687       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3688       Chunk                                                  // Chunk
3689   };
3690   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3691 }
3692 
3693 static void emitForStaticInitCall(
3694     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3695     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3696     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3697     const CGOpenMPRuntime::StaticRTInput &Values) {
3698   if (!CGF.HaveInsertPoint())
3699     return;
3700 
3701   assert(!Values.Ordered);
3702   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3703          Schedule == OMP_sch_static_balanced_chunked ||
3704          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3705          Schedule == OMP_dist_sch_static ||
3706          Schedule == OMP_dist_sch_static_chunked);
3707 
3708   // Call __kmpc_for_static_init(
3709   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3710   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3711   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3712   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3713   llvm::Value *Chunk = Values.Chunk;
3714   if (Chunk == nullptr) {
3715     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3716             Schedule == OMP_dist_sch_static) &&
3717            "expected static non-chunked schedule");
3718     // If the Chunk was not specified in the clause - use default value 1.
3719     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3720   } else {
3721     assert((Schedule == OMP_sch_static_chunked ||
3722             Schedule == OMP_sch_static_balanced_chunked ||
3723             Schedule == OMP_ord_static_chunked ||
3724             Schedule == OMP_dist_sch_static_chunked) &&
3725            "expected static chunked schedule");
3726   }
3727   llvm::Value *Args[] = {
3728       UpdateLocation,
3729       ThreadId,
3730       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3731                                                   M2)), // Schedule type
3732       Values.IL.getPointer(),                           // &isLastIter
3733       Values.LB.getPointer(),                           // &LB
3734       Values.UB.getPointer(),                           // &UB
3735       Values.ST.getPointer(),                           // &Stride
3736       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3737       Chunk                                             // Chunk
3738   };
3739   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3740 }
3741 
3742 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3743                                         SourceLocation Loc,
3744                                         OpenMPDirectiveKind DKind,
3745                                         const OpenMPScheduleTy &ScheduleKind,
3746                                         const StaticRTInput &Values) {
3747   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3748       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3749   assert(isOpenMPWorksharingDirective(DKind) &&
3750          "Expected loop-based or sections-based directive.");
3751   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3752                                              isOpenMPLoopDirective(DKind)
3753                                                  ? OMP_IDENT_WORK_LOOP
3754                                                  : OMP_IDENT_WORK_SECTIONS);
3755   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3756   llvm::FunctionCallee StaticInitFunction =
3757       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3758   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3759   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3760                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3761 }
3762 
3763 void CGOpenMPRuntime::emitDistributeStaticInit(
3764     CodeGenFunction &CGF, SourceLocation Loc,
3765     OpenMPDistScheduleClauseKind SchedKind,
3766     const CGOpenMPRuntime::StaticRTInput &Values) {
3767   OpenMPSchedType ScheduleNum =
3768       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3769   llvm::Value *UpdatedLocation =
3770       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3771   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772   llvm::FunctionCallee StaticInitFunction =
3773       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3774   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3776                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3777 }
3778 
3779 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3780                                           SourceLocation Loc,
3781                                           OpenMPDirectiveKind DKind) {
3782   if (!CGF.HaveInsertPoint())
3783     return;
3784   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3785   llvm::Value *Args[] = {
3786       emitUpdateLocation(CGF, Loc,
3787                          isOpenMPDistributeDirective(DKind)
3788                              ? OMP_IDENT_WORK_DISTRIBUTE
3789                              : isOpenMPLoopDirective(DKind)
3790                                    ? OMP_IDENT_WORK_LOOP
3791                                    : OMP_IDENT_WORK_SECTIONS),
3792       getThreadID(CGF, Loc)};
3793   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3794   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3795                       Args);
3796 }
3797 
3798 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3799                                                  SourceLocation Loc,
3800                                                  unsigned IVSize,
3801                                                  bool IVSigned) {
3802   if (!CGF.HaveInsertPoint())
3803     return;
3804   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3805   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3806   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3807 }
3808 
3809 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3810                                           SourceLocation Loc, unsigned IVSize,
3811                                           bool IVSigned, Address IL,
3812                                           Address LB, Address UB,
3813                                           Address ST) {
3814   // Call __kmpc_dispatch_next(
3815   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3816   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3817   //          kmp_int[32|64] *p_stride);
3818   llvm::Value *Args[] = {
3819       emitUpdateLocation(CGF, Loc),
3820       getThreadID(CGF, Loc),
3821       IL.getPointer(), // &isLastIter
3822       LB.getPointer(), // &Lower
3823       UB.getPointer(), // &Upper
3824       ST.getPointer()  // &Stride
3825   };
3826   llvm::Value *Call =
3827       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3828   return CGF.EmitScalarConversion(
3829       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3830       CGF.getContext().BoolTy, Loc);
3831 }
3832 
3833 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3834                                            llvm::Value *NumThreads,
3835                                            SourceLocation Loc) {
3836   if (!CGF.HaveInsertPoint())
3837     return;
3838   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3839   llvm::Value *Args[] = {
3840       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3841       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3842   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3843                       Args);
3844 }
3845 
3846 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3847                                          ProcBindKind ProcBind,
3848                                          SourceLocation Loc) {
3849   if (!CGF.HaveInsertPoint())
3850     return;
3851   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3852   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3853   llvm::Value *Args[] = {
3854       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3856   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3857 }
3858 
3859 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3860                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3861   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3862   if (OMPBuilder) {
3863     OMPBuilder->CreateFlush(CGF.Builder);
3864   } else {
3865     if (!CGF.HaveInsertPoint())
3866       return;
3867     // Build call void __kmpc_flush(ident_t *loc)
3868     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3869                         emitUpdateLocation(CGF, Loc));
3870   }
3871 }
3872 
3873 namespace {
3874 /// Indexes of fields for type kmp_task_t.
3875 enum KmpTaskTFields {
3876   /// List of shared variables.
3877   KmpTaskTShareds,
3878   /// Task routine.
3879   KmpTaskTRoutine,
3880   /// Partition id for the untied tasks.
3881   KmpTaskTPartId,
3882   /// Function with call of destructors for private variables.
3883   Data1,
3884   /// Task priority.
3885   Data2,
3886   /// (Taskloops only) Lower bound.
3887   KmpTaskTLowerBound,
3888   /// (Taskloops only) Upper bound.
3889   KmpTaskTUpperBound,
3890   /// (Taskloops only) Stride.
3891   KmpTaskTStride,
3892   /// (Taskloops only) Is last iteration flag.
3893   KmpTaskTLastIter,
3894   /// (Taskloops only) Reduction data.
3895   KmpTaskTReductions,
3896 };
3897 } // anonymous namespace
3898 
3899 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3900   return OffloadEntriesTargetRegion.empty() &&
3901          OffloadEntriesDeviceGlobalVar.empty();
3902 }
3903 
3904 /// Initialize target region entry.
3905 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3906     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3907                                     StringRef ParentName, unsigned LineNum,
3908                                     unsigned Order) {
3909   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3910                                              "only required for the device "
3911                                              "code generation.");
3912   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3913       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3914                                    OMPTargetRegionEntryTargetRegion);
3915   ++OffloadingEntriesNum;
3916 }
3917 
3918 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3919     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3920                                   StringRef ParentName, unsigned LineNum,
3921                                   llvm::Constant *Addr, llvm::Constant *ID,
3922                                   OMPTargetRegionEntryKind Flags) {
3923   // If we are emitting code for a target, the entry is already initialized,
3924   // only has to be registered.
3925   if (CGM.getLangOpts().OpenMPIsDevice) {
3926     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3927       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3928           DiagnosticsEngine::Error,
3929           "Unable to find target region on line '%0' in the device code.");
3930       CGM.getDiags().Report(DiagID) << LineNum;
3931       return;
3932     }
3933     auto &Entry =
3934         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3935     assert(Entry.isValid() && "Entry not initialized!");
3936     Entry.setAddress(Addr);
3937     Entry.setID(ID);
3938     Entry.setFlags(Flags);
3939   } else {
3940     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3941     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3942     ++OffloadingEntriesNum;
3943   }
3944 }
3945 
3946 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3947     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3948     unsigned LineNum) const {
3949   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3950   if (PerDevice == OffloadEntriesTargetRegion.end())
3951     return false;
3952   auto PerFile = PerDevice->second.find(FileID);
3953   if (PerFile == PerDevice->second.end())
3954     return false;
3955   auto PerParentName = PerFile->second.find(ParentName);
3956   if (PerParentName == PerFile->second.end())
3957     return false;
3958   auto PerLine = PerParentName->second.find(LineNum);
3959   if (PerLine == PerParentName->second.end())
3960     return false;
3961   // Fail if this entry is already registered.
3962   if (PerLine->second.getAddress() || PerLine->second.getID())
3963     return false;
3964   return true;
3965 }
3966 
3967 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3968     const OffloadTargetRegionEntryInfoActTy &Action) {
3969   // Scan all target region entries and perform the provided action.
3970   for (const auto &D : OffloadEntriesTargetRegion)
3971     for (const auto &F : D.second)
3972       for (const auto &P : F.second)
3973         for (const auto &L : P.second)
3974           Action(D.first, F.first, P.first(), L.first, L.second);
3975 }
3976 
3977 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3978     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3979                                        OMPTargetGlobalVarEntryKind Flags,
3980                                        unsigned Order) {
3981   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3982                                              "only required for the device "
3983                                              "code generation.");
3984   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3985   ++OffloadingEntriesNum;
3986 }
3987 
3988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3989     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3990                                      CharUnits VarSize,
3991                                      OMPTargetGlobalVarEntryKind Flags,
3992                                      llvm::GlobalValue::LinkageTypes Linkage) {
3993   if (CGM.getLangOpts().OpenMPIsDevice) {
3994     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3995     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3996            "Entry not initialized!");
3997     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3998            "Resetting with the new address.");
3999     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4000       if (Entry.getVarSize().isZero()) {
4001         Entry.setVarSize(VarSize);
4002         Entry.setLinkage(Linkage);
4003       }
4004       return;
4005     }
4006     Entry.setVarSize(VarSize);
4007     Entry.setLinkage(Linkage);
4008     Entry.setAddress(Addr);
4009   } else {
4010     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4011       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4012       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4013              "Entry not initialized!");
4014       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4015              "Resetting with the new address.");
4016       if (Entry.getVarSize().isZero()) {
4017         Entry.setVarSize(VarSize);
4018         Entry.setLinkage(Linkage);
4019       }
4020       return;
4021     }
4022     OffloadEntriesDeviceGlobalVar.try_emplace(
4023         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4024     ++OffloadingEntriesNum;
4025   }
4026 }
4027 
4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4029     actOnDeviceGlobalVarEntriesInfo(
4030         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4031   // Scan all target region entries and perform the provided action.
4032   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4033     Action(E.getKey(), E.getValue());
4034 }
4035 
4036 void CGOpenMPRuntime::createOffloadEntry(
4037     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4038     llvm::GlobalValue::LinkageTypes Linkage) {
4039   StringRef Name = Addr->getName();
4040   llvm::Module &M = CGM.getModule();
4041   llvm::LLVMContext &C = M.getContext();
4042 
4043   // Create constant string with the name.
4044   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4045 
4046   std::string StringName = getName({"omp_offloading", "entry_name"});
4047   auto *Str = new llvm::GlobalVariable(
4048       M, StrPtrInit->getType(), /*isConstant=*/true,
4049       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4050   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4051 
4052   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4053                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4054                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4055                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4056                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4057   std::string EntryName = getName({"omp_offloading", "entry", ""});
4058   llvm::GlobalVariable *Entry = createGlobalStruct(
4059       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4060       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4061 
4062   // The entry has to be created in the section the linker expects it to be.
4063   Entry->setSection("omp_offloading_entries");
4064 }
4065 
4066 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4067   // Emit the offloading entries and metadata so that the device codegen side
4068   // can easily figure out what to emit. The produced metadata looks like
4069   // this:
4070   //
4071   // !omp_offload.info = !{!1, ...}
4072   //
4073   // Right now we only generate metadata for function that contain target
4074   // regions.
4075 
4076   // If we are in simd mode or there are no entries, we don't need to do
4077   // anything.
4078   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4079     return;
4080 
4081   llvm::Module &M = CGM.getModule();
4082   llvm::LLVMContext &C = M.getContext();
4083   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4084                          SourceLocation, StringRef>,
4085               16>
4086       OrderedEntries(OffloadEntriesInfoManager.size());
4087   llvm::SmallVector<StringRef, 16> ParentFunctions(
4088       OffloadEntriesInfoManager.size());
4089 
4090   // Auxiliary methods to create metadata values and strings.
4091   auto &&GetMDInt = [this](unsigned V) {
4092     return llvm::ConstantAsMetadata::get(
4093         llvm::ConstantInt::get(CGM.Int32Ty, V));
4094   };
4095 
4096   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4097 
4098   // Create the offloading info metadata node.
4099   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4100 
4101   // Create function that emits metadata for each target region entry;
4102   auto &&TargetRegionMetadataEmitter =
4103       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4104        &GetMDString](
4105           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4106           unsigned Line,
4107           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4108         // Generate metadata for target regions. Each entry of this metadata
4109         // contains:
4110         // - Entry 0 -> Kind of this type of metadata (0).
4111         // - Entry 1 -> Device ID of the file where the entry was identified.
4112         // - Entry 2 -> File ID of the file where the entry was identified.
4113         // - Entry 3 -> Mangled name of the function where the entry was
4114         // identified.
4115         // - Entry 4 -> Line in the file where the entry was identified.
4116         // - Entry 5 -> Order the entry was created.
4117         // The first element of the metadata node is the kind.
4118         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4119                                  GetMDInt(FileID),      GetMDString(ParentName),
4120                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4121 
4122         SourceLocation Loc;
4123         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4124                   E = CGM.getContext().getSourceManager().fileinfo_end();
4125              I != E; ++I) {
4126           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4127               I->getFirst()->getUniqueID().getFile() == FileID) {
4128             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4129                 I->getFirst(), Line, 1);
4130             break;
4131           }
4132         }
4133         // Save this entry in the right position of the ordered entries array.
4134         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4135         ParentFunctions[E.getOrder()] = ParentName;
4136 
4137         // Add metadata to the named metadata node.
4138         MD->addOperand(llvm::MDNode::get(C, Ops));
4139       };
4140 
4141   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4142       TargetRegionMetadataEmitter);
4143 
4144   // Create function that emits metadata for each device global variable entry;
4145   auto &&DeviceGlobalVarMetadataEmitter =
4146       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4147        MD](StringRef MangledName,
4148            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4149                &E) {
4150         // Generate metadata for global variables. Each entry of this metadata
4151         // contains:
4152         // - Entry 0 -> Kind of this type of metadata (1).
4153         // - Entry 1 -> Mangled name of the variable.
4154         // - Entry 2 -> Declare target kind.
4155         // - Entry 3 -> Order the entry was created.
4156         // The first element of the metadata node is the kind.
4157         llvm::Metadata *Ops[] = {
4158             GetMDInt(E.getKind()), GetMDString(MangledName),
4159             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4160 
4161         // Save this entry in the right position of the ordered entries array.
4162         OrderedEntries[E.getOrder()] =
4163             std::make_tuple(&E, SourceLocation(), MangledName);
4164 
4165         // Add metadata to the named metadata node.
4166         MD->addOperand(llvm::MDNode::get(C, Ops));
4167       };
4168 
4169   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4170       DeviceGlobalVarMetadataEmitter);
4171 
4172   for (const auto &E : OrderedEntries) {
4173     assert(std::get<0>(E) && "All ordered entries must exist!");
4174     if (const auto *CE =
4175             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4176                 std::get<0>(E))) {
4177       if (!CE->getID() || !CE->getAddress()) {
4178         // Do not blame the entry if the parent funtion is not emitted.
4179         StringRef FnName = ParentFunctions[CE->getOrder()];
4180         if (!CGM.GetGlobalValue(FnName))
4181           continue;
4182         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4183             DiagnosticsEngine::Error,
4184             "Offloading entry for target region in %0 is incorrect: either the "
4185             "address or the ID is invalid.");
4186         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4187         continue;
4188       }
4189       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4190                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4191     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4192                                              OffloadEntryInfoDeviceGlobalVar>(
4193                    std::get<0>(E))) {
4194       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4195           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4196               CE->getFlags());
4197       switch (Flags) {
4198       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4199         if (CGM.getLangOpts().OpenMPIsDevice &&
4200             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4201           continue;
4202         if (!CE->getAddress()) {
4203           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4204               DiagnosticsEngine::Error, "Offloading entry for declare target "
4205                                         "variable %0 is incorrect: the "
4206                                         "address is invalid.");
4207           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4208           continue;
4209         }
4210         // The vaiable has no definition - no need to add the entry.
4211         if (CE->getVarSize().isZero())
4212           continue;
4213         break;
4214       }
4215       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4216         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4217                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4218                "Declaret target link address is set.");
4219         if (CGM.getLangOpts().OpenMPIsDevice)
4220           continue;
4221         if (!CE->getAddress()) {
4222           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4223               DiagnosticsEngine::Error,
4224               "Offloading entry for declare target variable is incorrect: the "
4225               "address is invalid.");
4226           CGM.getDiags().Report(DiagID);
4227           continue;
4228         }
4229         break;
4230       }
4231       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4232                          CE->getVarSize().getQuantity(), Flags,
4233                          CE->getLinkage());
4234     } else {
4235       llvm_unreachable("Unsupported entry kind.");
4236     }
4237   }
4238 }
4239 
4240 /// Loads all the offload entries information from the host IR
4241 /// metadata.
4242 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4243   // If we are in target mode, load the metadata from the host IR. This code has
4244   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4245 
4246   if (!CGM.getLangOpts().OpenMPIsDevice)
4247     return;
4248 
4249   if (CGM.getLangOpts().OMPHostIRFile.empty())
4250     return;
4251 
4252   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4253   if (auto EC = Buf.getError()) {
4254     CGM.getDiags().Report(diag::err_cannot_open_file)
4255         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4256     return;
4257   }
4258 
4259   llvm::LLVMContext C;
4260   auto ME = expectedToErrorOrAndEmitErrors(
4261       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4262 
4263   if (auto EC = ME.getError()) {
4264     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4265         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4266     CGM.getDiags().Report(DiagID)
4267         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4268     return;
4269   }
4270 
4271   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4272   if (!MD)
4273     return;
4274 
4275   for (llvm::MDNode *MN : MD->operands()) {
4276     auto &&GetMDInt = [MN](unsigned Idx) {
4277       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4278       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4279     };
4280 
4281     auto &&GetMDString = [MN](unsigned Idx) {
4282       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4283       return V->getString();
4284     };
4285 
4286     switch (GetMDInt(0)) {
4287     default:
4288       llvm_unreachable("Unexpected metadata!");
4289       break;
4290     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4291         OffloadingEntryInfoTargetRegion:
4292       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4293           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4294           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4295           /*Order=*/GetMDInt(5));
4296       break;
4297     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4298         OffloadingEntryInfoDeviceGlobalVar:
4299       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4300           /*MangledName=*/GetMDString(1),
4301           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4302               /*Flags=*/GetMDInt(2)),
4303           /*Order=*/GetMDInt(3));
4304       break;
4305     }
4306   }
4307 }
4308 
4309 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4310   if (!KmpRoutineEntryPtrTy) {
4311     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4312     ASTContext &C = CGM.getContext();
4313     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4314     FunctionProtoType::ExtProtoInfo EPI;
4315     KmpRoutineEntryPtrQTy = C.getPointerType(
4316         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4317     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4318   }
4319 }
4320 
4321 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4322   // Make sure the type of the entry is already created. This is the type we
4323   // have to create:
4324   // struct __tgt_offload_entry{
4325   //   void      *addr;       // Pointer to the offload entry info.
4326   //                          // (function or global)
4327   //   char      *name;       // Name of the function or global.
4328   //   size_t     size;       // Size of the entry info (0 if it a function).
4329   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4330   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4331   // };
4332   if (TgtOffloadEntryQTy.isNull()) {
4333     ASTContext &C = CGM.getContext();
4334     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4335     RD->startDefinition();
4336     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4337     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4338     addFieldToRecordDecl(C, RD, C.getSizeType());
4339     addFieldToRecordDecl(
4340         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4341     addFieldToRecordDecl(
4342         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4343     RD->completeDefinition();
4344     RD->addAttr(PackedAttr::CreateImplicit(C));
4345     TgtOffloadEntryQTy = C.getRecordType(RD);
4346   }
4347   return TgtOffloadEntryQTy;
4348 }
4349 
4350 namespace {
4351 struct PrivateHelpersTy {
4352   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4353                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4354       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4355         PrivateElemInit(PrivateElemInit) {}
4356   const Expr *OriginalRef = nullptr;
4357   const VarDecl *Original = nullptr;
4358   const VarDecl *PrivateCopy = nullptr;
4359   const VarDecl *PrivateElemInit = nullptr;
4360 };
4361 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4362 } // anonymous namespace
4363 
4364 static RecordDecl *
4365 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4366   if (!Privates.empty()) {
4367     ASTContext &C = CGM.getContext();
4368     // Build struct .kmp_privates_t. {
4369     //         /*  private vars  */
4370     //       };
4371     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4372     RD->startDefinition();
4373     for (const auto &Pair : Privates) {
4374       const VarDecl *VD = Pair.second.Original;
4375       QualType Type = VD->getType().getNonReferenceType();
4376       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4377       if (VD->hasAttrs()) {
4378         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4379              E(VD->getAttrs().end());
4380              I != E; ++I)
4381           FD->addAttr(*I);
4382       }
4383     }
4384     RD->completeDefinition();
4385     return RD;
4386   }
4387   return nullptr;
4388 }
4389 
4390 static RecordDecl *
4391 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4392                          QualType KmpInt32Ty,
4393                          QualType KmpRoutineEntryPointerQTy) {
4394   ASTContext &C = CGM.getContext();
4395   // Build struct kmp_task_t {
4396   //         void *              shareds;
4397   //         kmp_routine_entry_t routine;
4398   //         kmp_int32           part_id;
4399   //         kmp_cmplrdata_t data1;
4400   //         kmp_cmplrdata_t data2;
4401   // For taskloops additional fields:
4402   //         kmp_uint64          lb;
4403   //         kmp_uint64          ub;
4404   //         kmp_int64           st;
4405   //         kmp_int32           liter;
4406   //         void *              reductions;
4407   //       };
4408   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4409   UD->startDefinition();
4410   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4411   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4412   UD->completeDefinition();
4413   QualType KmpCmplrdataTy = C.getRecordType(UD);
4414   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4415   RD->startDefinition();
4416   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4417   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4418   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4419   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4420   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4421   if (isOpenMPTaskLoopDirective(Kind)) {
4422     QualType KmpUInt64Ty =
4423         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4424     QualType KmpInt64Ty =
4425         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4426     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4427     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4428     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4429     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4430     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4431   }
4432   RD->completeDefinition();
4433   return RD;
4434 }
4435 
4436 static RecordDecl *
4437 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4438                                      ArrayRef<PrivateDataTy> Privates) {
4439   ASTContext &C = CGM.getContext();
4440   // Build struct kmp_task_t_with_privates {
4441   //         kmp_task_t task_data;
4442   //         .kmp_privates_t. privates;
4443   //       };
4444   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4445   RD->startDefinition();
4446   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4447   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4448     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4449   RD->completeDefinition();
4450   return RD;
4451 }
4452 
4453 /// Emit a proxy function which accepts kmp_task_t as the second
4454 /// argument.
4455 /// \code
4456 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4457 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4458 ///   For taskloops:
4459 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4460 ///   tt->reductions, tt->shareds);
4461 ///   return 0;
4462 /// }
4463 /// \endcode
4464 static llvm::Function *
4465 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4466                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4467                       QualType KmpTaskTWithPrivatesPtrQTy,
4468                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4469                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4470                       llvm::Value *TaskPrivatesMap) {
4471   ASTContext &C = CGM.getContext();
4472   FunctionArgList Args;
4473   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4474                             ImplicitParamDecl::Other);
4475   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4476                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4477                                 ImplicitParamDecl::Other);
4478   Args.push_back(&GtidArg);
4479   Args.push_back(&TaskTypeArg);
4480   const auto &TaskEntryFnInfo =
4481       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4482   llvm::FunctionType *TaskEntryTy =
4483       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4484   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4485   auto *TaskEntry = llvm::Function::Create(
4486       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4487   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4488   TaskEntry->setDoesNotRecurse();
4489   CodeGenFunction CGF(CGM);
4490   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4491                     Loc, Loc);
4492 
4493   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4494   // tt,
4495   // For taskloops:
4496   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4497   // tt->task_data.shareds);
4498   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4499       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4500   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4501       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4502       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4503   const auto *KmpTaskTWithPrivatesQTyRD =
4504       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4505   LValue Base =
4506       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4507   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4508   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4509   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4510   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4511 
4512   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4513   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4514   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4515       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4516       CGF.ConvertTypeForMem(SharedsPtrTy));
4517 
4518   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4519   llvm::Value *PrivatesParam;
4520   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4521     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4522     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4524   } else {
4525     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4526   }
4527 
4528   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4529                                TaskPrivatesMap,
4530                                CGF.Builder
4531                                    .CreatePointerBitCastOrAddrSpaceCast(
4532                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4533                                    .getPointer()};
4534   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4535                                           std::end(CommonArgs));
4536   if (isOpenMPTaskLoopDirective(Kind)) {
4537     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4538     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4539     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4540     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4541     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4542     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4543     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4544     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4545     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4546     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4547     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4548     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4549     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4550     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4551     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4552     CallArgs.push_back(LBParam);
4553     CallArgs.push_back(UBParam);
4554     CallArgs.push_back(StParam);
4555     CallArgs.push_back(LIParam);
4556     CallArgs.push_back(RParam);
4557   }
4558   CallArgs.push_back(SharedsParam);
4559 
4560   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4561                                                   CallArgs);
4562   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4563                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4564   CGF.FinishFunction();
4565   return TaskEntry;
4566 }
4567 
4568 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4569                                             SourceLocation Loc,
4570                                             QualType KmpInt32Ty,
4571                                             QualType KmpTaskTWithPrivatesPtrQTy,
4572                                             QualType KmpTaskTWithPrivatesQTy) {
4573   ASTContext &C = CGM.getContext();
4574   FunctionArgList Args;
4575   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4576                             ImplicitParamDecl::Other);
4577   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4578                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4579                                 ImplicitParamDecl::Other);
4580   Args.push_back(&GtidArg);
4581   Args.push_back(&TaskTypeArg);
4582   const auto &DestructorFnInfo =
4583       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4584   llvm::FunctionType *DestructorFnTy =
4585       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4586   std::string Name =
4587       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4588   auto *DestructorFn =
4589       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4590                              Name, &CGM.getModule());
4591   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4592                                     DestructorFnInfo);
4593   DestructorFn->setDoesNotRecurse();
4594   CodeGenFunction CGF(CGM);
4595   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4596                     Args, Loc, Loc);
4597 
4598   LValue Base = CGF.EmitLoadOfPointerLValue(
4599       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4600       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4601   const auto *KmpTaskTWithPrivatesQTyRD =
4602       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4603   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4604   Base = CGF.EmitLValueForField(Base, *FI);
4605   for (const auto *Field :
4606        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4607     if (QualType::DestructionKind DtorKind =
4608             Field->getType().isDestructedType()) {
4609       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4610       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4611     }
4612   }
4613   CGF.FinishFunction();
4614   return DestructorFn;
4615 }
4616 
4617 /// Emit a privates mapping function for correct handling of private and
4618 /// firstprivate variables.
4619 /// \code
4620 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4621 /// **noalias priv1,...,  <tyn> **noalias privn) {
4622 ///   *priv1 = &.privates.priv1;
4623 ///   ...;
4624 ///   *privn = &.privates.privn;
4625 /// }
4626 /// \endcode
4627 static llvm::Value *
4628 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4629                                ArrayRef<const Expr *> PrivateVars,
4630                                ArrayRef<const Expr *> FirstprivateVars,
4631                                ArrayRef<const Expr *> LastprivateVars,
4632                                QualType PrivatesQTy,
4633                                ArrayRef<PrivateDataTy> Privates) {
4634   ASTContext &C = CGM.getContext();
4635   FunctionArgList Args;
4636   ImplicitParamDecl TaskPrivatesArg(
4637       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4638       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4639       ImplicitParamDecl::Other);
4640   Args.push_back(&TaskPrivatesArg);
4641   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4642   unsigned Counter = 1;
4643   for (const Expr *E : PrivateVars) {
4644     Args.push_back(ImplicitParamDecl::Create(
4645         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646         C.getPointerType(C.getPointerType(E->getType()))
4647             .withConst()
4648             .withRestrict(),
4649         ImplicitParamDecl::Other));
4650     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4651     PrivateVarsPos[VD] = Counter;
4652     ++Counter;
4653   }
4654   for (const Expr *E : FirstprivateVars) {
4655     Args.push_back(ImplicitParamDecl::Create(
4656         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4657         C.getPointerType(C.getPointerType(E->getType()))
4658             .withConst()
4659             .withRestrict(),
4660         ImplicitParamDecl::Other));
4661     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4662     PrivateVarsPos[VD] = Counter;
4663     ++Counter;
4664   }
4665   for (const Expr *E : LastprivateVars) {
4666     Args.push_back(ImplicitParamDecl::Create(
4667         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4668         C.getPointerType(C.getPointerType(E->getType()))
4669             .withConst()
4670             .withRestrict(),
4671         ImplicitParamDecl::Other));
4672     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4673     PrivateVarsPos[VD] = Counter;
4674     ++Counter;
4675   }
4676   const auto &TaskPrivatesMapFnInfo =
4677       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4678   llvm::FunctionType *TaskPrivatesMapTy =
4679       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4680   std::string Name =
4681       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4682   auto *TaskPrivatesMap = llvm::Function::Create(
4683       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4684       &CGM.getModule());
4685   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4686                                     TaskPrivatesMapFnInfo);
4687   if (CGM.getLangOpts().Optimize) {
4688     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4689     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4690     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4691   }
4692   CodeGenFunction CGF(CGM);
4693   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4694                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4695 
4696   // *privi = &.privates.privi;
4697   LValue Base = CGF.EmitLoadOfPointerLValue(
4698       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4699       TaskPrivatesArg.getType()->castAs<PointerType>());
4700   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4701   Counter = 0;
4702   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4703     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4704     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4705     LValue RefLVal =
4706         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4707     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4708         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4709     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4710     ++Counter;
4711   }
4712   CGF.FinishFunction();
4713   return TaskPrivatesMap;
4714 }
4715 
4716 /// Emit initialization for private variables in task-based directives.
4717 static void emitPrivatesInit(CodeGenFunction &CGF,
4718                              const OMPExecutableDirective &D,
4719                              Address KmpTaskSharedsPtr, LValue TDBase,
4720                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4721                              QualType SharedsTy, QualType SharedsPtrTy,
4722                              const OMPTaskDataTy &Data,
4723                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4724   ASTContext &C = CGF.getContext();
4725   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4726   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4727   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4728                                  ? OMPD_taskloop
4729                                  : OMPD_task;
4730   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4731   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4732   LValue SrcBase;
4733   bool IsTargetTask =
4734       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4735       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4736   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4737   // PointersArray and SizesArray. The original variables for these arrays are
4738   // not captured and we get their addresses explicitly.
4739   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4740       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4741     SrcBase = CGF.MakeAddrLValue(
4742         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4743             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4744         SharedsTy);
4745   }
4746   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4747   for (const PrivateDataTy &Pair : Privates) {
4748     const VarDecl *VD = Pair.second.PrivateCopy;
4749     const Expr *Init = VD->getAnyInitializer();
4750     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4751                              !CGF.isTrivialInitializer(Init)))) {
4752       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4753       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4754         const VarDecl *OriginalVD = Pair.second.Original;
4755         // Check if the variable is the target-based BasePointersArray,
4756         // PointersArray or SizesArray.
4757         LValue SharedRefLValue;
4758         QualType Type = PrivateLValue.getType();
4759         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4760         if (IsTargetTask && !SharedField) {
4761           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4762                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4763                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4764                          ->getNumParams() == 0 &&
4765                  isa<TranslationUnitDecl>(
4766                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4767                          ->getDeclContext()) &&
4768                  "Expected artificial target data variable.");
4769           SharedRefLValue =
4770               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4771         } else if (ForDup) {
4772           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4773           SharedRefLValue = CGF.MakeAddrLValue(
4774               Address(SharedRefLValue.getPointer(CGF),
4775                       C.getDeclAlign(OriginalVD)),
4776               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4777               SharedRefLValue.getTBAAInfo());
4778         } else {
4779           InlinedOpenMPRegionRAII Region(
4780               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4781               /*HasCancel=*/false);
4782           SharedRefLValue =  CGF.EmitLValue(Pair.second.OriginalRef);
4783         }
4784         if (Type->isArrayType()) {
4785           // Initialize firstprivate array.
4786           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4787             // Perform simple memcpy.
4788             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4789           } else {
4790             // Initialize firstprivate array using element-by-element
4791             // initialization.
4792             CGF.EmitOMPAggregateAssign(
4793                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4794                 Type,
4795                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4796                                                   Address SrcElement) {
4797                   // Clean up any temporaries needed by the initialization.
4798                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4799                   InitScope.addPrivate(
4800                       Elem, [SrcElement]() -> Address { return SrcElement; });
4801                   (void)InitScope.Privatize();
4802                   // Emit initialization for single element.
4803                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4804                       CGF, &CapturesInfo);
4805                   CGF.EmitAnyExprToMem(Init, DestElement,
4806                                        Init->getType().getQualifiers(),
4807                                        /*IsInitializer=*/false);
4808                 });
4809           }
4810         } else {
4811           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4812           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4813             return SharedRefLValue.getAddress(CGF);
4814           });
4815           (void)InitScope.Privatize();
4816           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4817           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4818                              /*capturedByInit=*/false);
4819         }
4820       } else {
4821         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4822       }
4823     }
4824     ++FI;
4825   }
4826 }
4827 
4828 /// Check if duplication function is required for taskloops.
4829 static bool checkInitIsRequired(CodeGenFunction &CGF,
4830                                 ArrayRef<PrivateDataTy> Privates) {
4831   bool InitRequired = false;
4832   for (const PrivateDataTy &Pair : Privates) {
4833     const VarDecl *VD = Pair.second.PrivateCopy;
4834     const Expr *Init = VD->getAnyInitializer();
4835     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4836                                     !CGF.isTrivialInitializer(Init));
4837     if (InitRequired)
4838       break;
4839   }
4840   return InitRequired;
4841 }
4842 
4843 
4844 /// Emit task_dup function (for initialization of
4845 /// private/firstprivate/lastprivate vars and last_iter flag)
4846 /// \code
4847 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4848 /// lastpriv) {
4849 /// // setup lastprivate flag
4850 ///    task_dst->last = lastpriv;
4851 /// // could be constructor calls here...
4852 /// }
4853 /// \endcode
4854 static llvm::Value *
4855 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4856                     const OMPExecutableDirective &D,
4857                     QualType KmpTaskTWithPrivatesPtrQTy,
4858                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4859                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4860                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4861                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4862   ASTContext &C = CGM.getContext();
4863   FunctionArgList Args;
4864   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4865                            KmpTaskTWithPrivatesPtrQTy,
4866                            ImplicitParamDecl::Other);
4867   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868                            KmpTaskTWithPrivatesPtrQTy,
4869                            ImplicitParamDecl::Other);
4870   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4871                                 ImplicitParamDecl::Other);
4872   Args.push_back(&DstArg);
4873   Args.push_back(&SrcArg);
4874   Args.push_back(&LastprivArg);
4875   const auto &TaskDupFnInfo =
4876       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4877   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4878   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4879   auto *TaskDup = llvm::Function::Create(
4880       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4881   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4882   TaskDup->setDoesNotRecurse();
4883   CodeGenFunction CGF(CGM);
4884   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4885                     Loc);
4886 
4887   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4888       CGF.GetAddrOfLocalVar(&DstArg),
4889       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4890   // task_dst->liter = lastpriv;
4891   if (WithLastIter) {
4892     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4893     LValue Base = CGF.EmitLValueForField(
4894         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4895     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4896     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4897         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4898     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4899   }
4900 
4901   // Emit initial values for private copies (if any).
4902   assert(!Privates.empty());
4903   Address KmpTaskSharedsPtr = Address::invalid();
4904   if (!Data.FirstprivateVars.empty()) {
4905     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4906         CGF.GetAddrOfLocalVar(&SrcArg),
4907         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4908     LValue Base = CGF.EmitLValueForField(
4909         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4910     KmpTaskSharedsPtr = Address(
4911         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4912                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4913                                                   KmpTaskTShareds)),
4914                              Loc),
4915         CGF.getNaturalTypeAlignment(SharedsTy));
4916   }
4917   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4918                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4919   CGF.FinishFunction();
4920   return TaskDup;
4921 }
4922 
4923 /// Checks if destructor function is required to be generated.
4924 /// \return true if cleanups are required, false otherwise.
4925 static bool
4926 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4927   bool NeedsCleanup = false;
4928   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4929   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4930   for (const FieldDecl *FD : PrivateRD->fields()) {
4931     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4932     if (NeedsCleanup)
4933       break;
4934   }
4935   return NeedsCleanup;
4936 }
4937 
4938 CGOpenMPRuntime::TaskResultTy
4939 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4940                               const OMPExecutableDirective &D,
4941                               llvm::Function *TaskFunction, QualType SharedsTy,
4942                               Address Shareds, const OMPTaskDataTy &Data) {
4943   ASTContext &C = CGM.getContext();
4944   llvm::SmallVector<PrivateDataTy, 4> Privates;
4945   // Aggregate privates and sort them by the alignment.
4946   const auto *I = Data.PrivateCopies.begin();
4947   for (const Expr *E : Data.PrivateVars) {
4948     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4949     Privates.emplace_back(
4950         C.getDeclAlign(VD),
4951         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4952                          /*PrivateElemInit=*/nullptr));
4953     ++I;
4954   }
4955   I = Data.FirstprivateCopies.begin();
4956   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4957   for (const Expr *E : Data.FirstprivateVars) {
4958     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4959     Privates.emplace_back(
4960         C.getDeclAlign(VD),
4961         PrivateHelpersTy(
4962             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4963             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4964     ++I;
4965     ++IElemInitRef;
4966   }
4967   I = Data.LastprivateCopies.begin();
4968   for (const Expr *E : Data.LastprivateVars) {
4969     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4970     Privates.emplace_back(
4971         C.getDeclAlign(VD),
4972         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4973                          /*PrivateElemInit=*/nullptr));
4974     ++I;
4975   }
4976   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4977     return L.first > R.first;
4978   });
4979   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4980   // Build type kmp_routine_entry_t (if not built yet).
4981   emitKmpRoutineEntryT(KmpInt32Ty);
4982   // Build type kmp_task_t (if not built yet).
4983   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4984     if (SavedKmpTaskloopTQTy.isNull()) {
4985       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4986           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4987     }
4988     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4989   } else {
4990     assert((D.getDirectiveKind() == OMPD_task ||
4991             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4992             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4993            "Expected taskloop, task or target directive");
4994     if (SavedKmpTaskTQTy.isNull()) {
4995       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4996           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4997     }
4998     KmpTaskTQTy = SavedKmpTaskTQTy;
4999   }
5000   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5001   // Build particular struct kmp_task_t for the given task.
5002   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5003       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5004   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5005   QualType KmpTaskTWithPrivatesPtrQTy =
5006       C.getPointerType(KmpTaskTWithPrivatesQTy);
5007   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5008   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5009       KmpTaskTWithPrivatesTy->getPointerTo();
5010   llvm::Value *KmpTaskTWithPrivatesTySize =
5011       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5012   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5013 
5014   // Emit initial values for private copies (if any).
5015   llvm::Value *TaskPrivatesMap = nullptr;
5016   llvm::Type *TaskPrivatesMapTy =
5017       std::next(TaskFunction->arg_begin(), 3)->getType();
5018   if (!Privates.empty()) {
5019     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5020     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5021         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5022         FI->getType(), Privates);
5023     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5024         TaskPrivatesMap, TaskPrivatesMapTy);
5025   } else {
5026     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5027         cast<llvm::PointerType>(TaskPrivatesMapTy));
5028   }
5029   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5030   // kmp_task_t *tt);
5031   llvm::Function *TaskEntry = emitProxyTaskFunction(
5032       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5033       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5034       TaskPrivatesMap);
5035 
5036   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5037   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5038   // kmp_routine_entry_t *task_entry);
5039   // Task flags. Format is taken from
5040   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5041   // description of kmp_tasking_flags struct.
5042   enum {
5043     TiedFlag = 0x1,
5044     FinalFlag = 0x2,
5045     DestructorsFlag = 0x8,
5046     PriorityFlag = 0x20,
5047     DetachableFlag = 0x40,
5048   };
5049   unsigned Flags = Data.Tied ? TiedFlag : 0;
5050   bool NeedsCleanup = false;
5051   if (!Privates.empty()) {
5052     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5053     if (NeedsCleanup)
5054       Flags = Flags | DestructorsFlag;
5055   }
5056   if (Data.Priority.getInt())
5057     Flags = Flags | PriorityFlag;
5058   if (D.hasClausesOfKind<OMPDetachClause>())
5059     Flags = Flags | DetachableFlag;
5060   llvm::Value *TaskFlags =
5061       Data.Final.getPointer()
5062           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063                                      CGF.Builder.getInt32(FinalFlag),
5064                                      CGF.Builder.getInt32(/*C=*/0))
5065           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5070       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5071           TaskEntry, KmpRoutineEntryPtrTy)};
5072   llvm::Value *NewTask;
5073   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074     // Check if we have any device clause associated with the directive.
5075     const Expr *Device = nullptr;
5076     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077       Device = C->getDevice();
5078     // Emit device ID if any otherwise use default value.
5079     llvm::Value *DeviceID;
5080     if (Device)
5081       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082                                            CGF.Int64Ty, /*isSigned=*/true);
5083     else
5084       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085     AllocArgs.push_back(DeviceID);
5086     NewTask = CGF.EmitRuntimeCall(
5087       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5088   } else {
5089     NewTask = CGF.EmitRuntimeCall(
5090       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5091   }
5092   // Emit detach clause initialization.
5093   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
5094   // task_descriptor);
5095   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
5096     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
5097     LValue EvtLVal = CGF.EmitLValue(Evt);
5098 
5099     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
5100     // int gtid, kmp_task_t *task);
5101     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
5102     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
5103     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
5104     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
5105         createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event),
5106         {Loc, Tid, NewTask});
5107     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
5108                                       Evt->getExprLoc());
5109     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
5110   }
5111   llvm::Value *NewTaskNewTaskTTy =
5112       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5113           NewTask, KmpTaskTWithPrivatesPtrTy);
5114   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5115                                                KmpTaskTWithPrivatesQTy);
5116   LValue TDBase =
5117       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5118   // Fill the data in the resulting kmp_task_t record.
5119   // Copy shareds if there are any.
5120   Address KmpTaskSharedsPtr = Address::invalid();
5121   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5122     KmpTaskSharedsPtr =
5123         Address(CGF.EmitLoadOfScalar(
5124                     CGF.EmitLValueForField(
5125                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5126                                            KmpTaskTShareds)),
5127                     Loc),
5128                 CGF.getNaturalTypeAlignment(SharedsTy));
5129     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5130     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5131     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5132   }
5133   // Emit initial values for private copies (if any).
5134   TaskResultTy Result;
5135   if (!Privates.empty()) {
5136     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5137                      SharedsTy, SharedsPtrTy, Data, Privates,
5138                      /*ForDup=*/false);
5139     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5140         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5141       Result.TaskDupFn = emitTaskDupFunction(
5142           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5143           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5144           /*WithLastIter=*/!Data.LastprivateVars.empty());
5145     }
5146   }
5147   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5148   enum { Priority = 0, Destructors = 1 };
5149   // Provide pointer to function with destructors for privates.
5150   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5151   const RecordDecl *KmpCmplrdataUD =
5152       (*FI)->getType()->getAsUnionType()->getDecl();
5153   if (NeedsCleanup) {
5154     llvm::Value *DestructorFn = emitDestructorsFunction(
5155         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5156         KmpTaskTWithPrivatesQTy);
5157     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5158     LValue DestructorsLV = CGF.EmitLValueForField(
5159         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5160     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5161                               DestructorFn, KmpRoutineEntryPtrTy),
5162                           DestructorsLV);
5163   }
5164   // Set priority.
5165   if (Data.Priority.getInt()) {
5166     LValue Data2LV = CGF.EmitLValueForField(
5167         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5168     LValue PriorityLV = CGF.EmitLValueForField(
5169         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5170     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5171   }
5172   Result.NewTask = NewTask;
5173   Result.TaskEntry = TaskEntry;
5174   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5175   Result.TDBase = TDBase;
5176   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5177   return Result;
5178 }
5179 
5180 namespace {
5181 /// Dependence kind for RTL.
5182 enum RTLDependenceKindTy {
5183   DepIn = 0x01,
5184   DepInOut = 0x3,
5185   DepMutexInOutSet = 0x4
5186 };
5187 /// Fields ids in kmp_depend_info record.
5188 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5189 } // namespace
5190 
5191 /// Translates internal dependency kind into the runtime kind.
5192 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5193   RTLDependenceKindTy DepKind;
5194   switch (K) {
5195   case OMPC_DEPEND_in:
5196     DepKind = DepIn;
5197     break;
5198   // Out and InOut dependencies must use the same code.
5199   case OMPC_DEPEND_out:
5200   case OMPC_DEPEND_inout:
5201     DepKind = DepInOut;
5202     break;
5203   case OMPC_DEPEND_mutexinoutset:
5204     DepKind = DepMutexInOutSet;
5205     break;
5206   case OMPC_DEPEND_source:
5207   case OMPC_DEPEND_sink:
5208   case OMPC_DEPEND_depobj:
5209   case OMPC_DEPEND_unknown:
5210     llvm_unreachable("Unknown task dependence type");
5211   }
5212   return DepKind;
5213 }
5214 
5215 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5216 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5217                            QualType &FlagsTy) {
5218   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5219   if (KmpDependInfoTy.isNull()) {
5220     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5221     KmpDependInfoRD->startDefinition();
5222     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5223     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5224     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5225     KmpDependInfoRD->completeDefinition();
5226     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5227   }
5228 }
5229 
5230 std::pair<llvm::Value *, LValue>
5231 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5232                                    SourceLocation Loc) {
5233   ASTContext &C = CGM.getContext();
5234   QualType FlagsTy;
5235   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5236   RecordDecl *KmpDependInfoRD =
5237       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5238   LValue Base = CGF.EmitLoadOfPointerLValue(
5239       DepobjLVal.getAddress(CGF),
5240       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5241   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5242   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5243           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5244   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5245                             Base.getTBAAInfo());
5246   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5247       Addr.getPointer(),
5248       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5249   LValue NumDepsBase = CGF.MakeAddrLValue(
5250       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5251       Base.getBaseInfo(), Base.getTBAAInfo());
5252   // NumDeps = deps[i].base_addr;
5253   LValue BaseAddrLVal = CGF.EmitLValueForField(
5254       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5255   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5256   return std::make_pair(NumDeps, Base);
5257 }
5258 
5259 namespace {
5260 /// Loop generator for OpenMP iterator expression.
5261 class OMPIteratorGeneratorScope final
5262     : public CodeGenFunction::OMPPrivateScope {
5263   CodeGenFunction &CGF;
5264   const OMPIteratorExpr *E = nullptr;
5265   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
5266   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
5267   OMPIteratorGeneratorScope() = delete;
5268   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
5269 
5270 public:
5271   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
5272       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
5273     if (!E)
5274       return;
5275     SmallVector<llvm::Value *, 4> Uppers;
5276     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5277       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
5278       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
5279       addPrivate(VD, [&CGF, VD]() {
5280         return CGF.CreateMemTemp(VD->getType(), VD->getName());
5281       });
5282       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5283       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
5284         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
5285                                  "counter.addr");
5286       });
5287     }
5288     Privatize();
5289 
5290     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5291       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5292       LValue CLVal =
5293           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
5294                              HelperData.CounterVD->getType());
5295       // Counter = 0;
5296       CGF.EmitStoreOfScalar(
5297           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
5298           CLVal);
5299       CodeGenFunction::JumpDest &ContDest =
5300           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
5301       CodeGenFunction::JumpDest &ExitDest =
5302           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
5303       // N = <number-of_iterations>;
5304       llvm::Value *N = Uppers[I];
5305       // cont:
5306       // if (Counter < N) goto body; else goto exit;
5307       CGF.EmitBlock(ContDest.getBlock());
5308       auto *CVal =
5309           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
5310       llvm::Value *Cmp =
5311           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
5312               ? CGF.Builder.CreateICmpSLT(CVal, N)
5313               : CGF.Builder.CreateICmpULT(CVal, N);
5314       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
5315       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
5316       // body:
5317       CGF.EmitBlock(BodyBB);
5318       // Iteri = Begini + Counter * Stepi;
5319       CGF.EmitIgnoredExpr(HelperData.Update);
5320     }
5321   }
5322   ~OMPIteratorGeneratorScope() {
5323     if (!E)
5324       return;
5325     for (unsigned I = E->numOfIterators(); I > 0; --I) {
5326       // Counter = Counter + 1;
5327       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
5328       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
5329       // goto cont;
5330       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
5331       // exit:
5332       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
5333     }
5334   }
5335 };
5336 } // namespace
5337 
5338 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5339                            llvm::PointerUnion<unsigned *, LValue *> Pos,
5340                            const OMPTaskDataTy::DependData &Data,
5341                            Address DependenciesArray) {
5342   CodeGenModule &CGM = CGF.CGM;
5343   ASTContext &C = CGM.getContext();
5344   QualType FlagsTy;
5345   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5346   RecordDecl *KmpDependInfoRD =
5347       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5348   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5349 
5350   OMPIteratorGeneratorScope IteratorScope(
5351       CGF, cast_or_null<OMPIteratorExpr>(
5352                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5353                                  : nullptr));
5354   for (const Expr *E : Data.DepExprs) {
5355     const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
5356     llvm::Value *Addr;
5357     if (OASE) {
5358       const Expr *Base = OASE->getBase();
5359       Addr = CGF.EmitScalarExpr(Base);
5360     } else {
5361       Addr = CGF.EmitLValue(E).getPointer(CGF);
5362     }
5363     llvm::Value *Size;
5364     QualType Ty = E->getType();
5365     if (OASE) {
5366       Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
5367       for (const Expr *SE : OASE->getDimensions()) {
5368         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
5369         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
5370                                       CGF.getContext().getSizeType(),
5371                                       SE->getExprLoc());
5372         Size = CGF.Builder.CreateNUWMul(Size, Sz);
5373       }
5374     } else if (const auto *ASE =
5375                    dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5376       LValue UpAddrLVal =
5377           CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5378       llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5379           UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5380       llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
5381       llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5382       Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5383     } else {
5384       Size = CGF.getTypeSize(Ty);
5385     }
5386     LValue Base;
5387     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5388       Base = CGF.MakeAddrLValue(
5389           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
5390     } else {
5391       LValue &PosLVal = *Pos.get<LValue *>();
5392       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5393       Base = CGF.MakeAddrLValue(
5394           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
5395                   DependenciesArray.getAlignment()),
5396           KmpDependInfoTy);
5397     }
5398     // deps[i].base_addr = &<Dependencies[i].second>;
5399     LValue BaseAddrLVal = CGF.EmitLValueForField(
5400         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5401     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
5402                           BaseAddrLVal);
5403     // deps[i].len = sizeof(<Dependencies[i].second>);
5404     LValue LenLVal = CGF.EmitLValueForField(
5405         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5406     CGF.EmitStoreOfScalar(Size, LenLVal);
5407     // deps[i].flags = <Dependencies[i].first>;
5408     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
5409     LValue FlagsLVal = CGF.EmitLValueForField(
5410         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5411     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5412                           FlagsLVal);
5413     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5414       ++(*P);
5415     } else {
5416       LValue &PosLVal = *Pos.get<LValue *>();
5417       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5418       Idx = CGF.Builder.CreateNUWAdd(Idx,
5419                                      llvm::ConstantInt::get(Idx->getType(), 1));
5420       CGF.EmitStoreOfScalar(Idx, PosLVal);
5421     }
5422   }
5423 }
5424 
5425 static SmallVector<llvm::Value *, 4>
5426 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5427                         const OMPTaskDataTy::DependData &Data) {
5428   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5429          "Expected depobj dependecy kind.");
5430   SmallVector<llvm::Value *, 4> Sizes;
5431   SmallVector<LValue, 4> SizeLVals;
5432   ASTContext &C = CGF.getContext();
5433   QualType FlagsTy;
5434   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5435   RecordDecl *KmpDependInfoRD =
5436       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5437   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5438   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5439   {
5440     OMPIteratorGeneratorScope IteratorScope(
5441         CGF, cast_or_null<OMPIteratorExpr>(
5442                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5443                                    : nullptr));
5444     for (const Expr *E : Data.DepExprs) {
5445       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5446       LValue Base = CGF.EmitLoadOfPointerLValue(
5447           DepobjLVal.getAddress(CGF),
5448           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5449       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5450           Base.getAddress(CGF), KmpDependInfoPtrT);
5451       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5452                                 Base.getTBAAInfo());
5453       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5454           Addr.getPointer(),
5455           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5456       LValue NumDepsBase = CGF.MakeAddrLValue(
5457           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5458           Base.getBaseInfo(), Base.getTBAAInfo());
5459       // NumDeps = deps[i].base_addr;
5460       LValue BaseAddrLVal = CGF.EmitLValueForField(
5461           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5462       llvm::Value *NumDeps =
5463           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5464       LValue NumLVal = CGF.MakeAddrLValue(
5465           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
5466           C.getUIntPtrType());
5467       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
5468                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
5469       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
5470       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
5471       CGF.EmitStoreOfScalar(Add, NumLVal);
5472       SizeLVals.push_back(NumLVal);
5473     }
5474   }
5475   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
5476     llvm::Value *Size =
5477         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
5478     Sizes.push_back(Size);
5479   }
5480   return Sizes;
5481 }
5482 
5483 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5484                                LValue PosLVal,
5485                                const OMPTaskDataTy::DependData &Data,
5486                                Address DependenciesArray) {
5487   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5488          "Expected depobj dependecy kind.");
5489   ASTContext &C = CGF.getContext();
5490   QualType FlagsTy;
5491   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5492   RecordDecl *KmpDependInfoRD =
5493       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5494   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5495   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5496   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5497   {
5498     OMPIteratorGeneratorScope IteratorScope(
5499         CGF, cast_or_null<OMPIteratorExpr>(
5500                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5501                                    : nullptr));
5502     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
5503       const Expr *E = Data.DepExprs[I];
5504       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5505       LValue Base = CGF.EmitLoadOfPointerLValue(
5506           DepobjLVal.getAddress(CGF),
5507           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5508       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5509           Base.getAddress(CGF), KmpDependInfoPtrT);
5510       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5511                                 Base.getTBAAInfo());
5512 
5513       // Get number of elements in a single depobj.
5514       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5515           Addr.getPointer(),
5516           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5517       LValue NumDepsBase = CGF.MakeAddrLValue(
5518           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5519           Base.getBaseInfo(), Base.getTBAAInfo());
5520       // NumDeps = deps[i].base_addr;
5521       LValue BaseAddrLVal = CGF.EmitLValueForField(
5522           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5523       llvm::Value *NumDeps =
5524           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5525 
5526       // memcopy dependency data.
5527       llvm::Value *Size = CGF.Builder.CreateNUWMul(
5528           ElSize,
5529           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
5530       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5531       Address DepAddr =
5532           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
5533                   DependenciesArray.getAlignment());
5534       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
5535 
5536       // Increase pos.
5537       // pos += size;
5538       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
5539       CGF.EmitStoreOfScalar(Add, PosLVal);
5540     }
5541   }
5542 }
5543 
5544 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5545     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
5546     SourceLocation Loc) {
5547   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
5548         return D.DepExprs.empty();
5549       }))
5550     return std::make_pair(nullptr, Address::invalid());
5551   // Process list of dependencies.
5552   ASTContext &C = CGM.getContext();
5553   Address DependenciesArray = Address::invalid();
5554   llvm::Value *NumOfElements = nullptr;
5555   unsigned NumDependencies = std::accumulate(
5556       Dependencies.begin(), Dependencies.end(), 0,
5557       [](unsigned V, const OMPTaskDataTy::DependData &D) {
5558         return D.DepKind == OMPC_DEPEND_depobj
5559                    ? V
5560                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
5561       });
5562   QualType FlagsTy;
5563   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5564   bool HasDepobjDeps = false;
5565   bool HasRegularWithIterators = false;
5566   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5567   llvm::Value *NumOfRegularWithIterators =
5568       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
5569   // Calculate number of depobj dependecies and regular deps with the iterators.
5570   for (const OMPTaskDataTy::DependData &D : Dependencies) {
5571     if (D.DepKind == OMPC_DEPEND_depobj) {
5572       SmallVector<llvm::Value *, 4> Sizes =
5573           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
5574       for (llvm::Value *Size : Sizes) {
5575         NumOfDepobjElements =
5576             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
5577       }
5578       HasDepobjDeps = true;
5579       continue;
5580     }
5581     // Include number of iterations, if any.
5582     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
5583       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5584         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5585         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
5586         NumOfRegularWithIterators =
5587             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
5588       }
5589       HasRegularWithIterators = true;
5590       continue;
5591     }
5592   }
5593 
5594   QualType KmpDependInfoArrayTy;
5595   if (HasDepobjDeps || HasRegularWithIterators) {
5596     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
5597                                            /*isSigned=*/false);
5598     if (HasDepobjDeps) {
5599       NumOfElements =
5600           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
5601     }
5602     if (HasRegularWithIterators) {
5603       NumOfElements =
5604           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
5605     }
5606     OpaqueValueExpr OVE(Loc,
5607                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5608                         VK_RValue);
5609     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5610                                                   RValue::get(NumOfElements));
5611     KmpDependInfoArrayTy =
5612         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5613                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5614     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5615     // Properly emit variable-sized array.
5616     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5617                                          ImplicitParamDecl::Other);
5618     CGF.EmitVarDecl(*PD);
5619     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5620     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5621                                               /*isSigned=*/false);
5622   } else {
5623     KmpDependInfoArrayTy = C.getConstantArrayType(
5624         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
5625         ArrayType::Normal, /*IndexTypeQuals=*/0);
5626     DependenciesArray =
5627         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5628     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
5629     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5630                                            /*isSigned=*/false);
5631   }
5632   unsigned Pos = 0;
5633   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5634     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5635         Dependencies[I].IteratorExpr)
5636       continue;
5637     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
5638                    DependenciesArray);
5639   }
5640   // Copy regular dependecies with iterators.
5641   LValue PosLVal = CGF.MakeAddrLValue(
5642       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
5643   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
5644   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5645     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5646         !Dependencies[I].IteratorExpr)
5647       continue;
5648     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
5649                    DependenciesArray);
5650   }
5651   // Copy final depobj arrays without iterators.
5652   if (HasDepobjDeps) {
5653     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5654       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
5655         continue;
5656       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
5657                          DependenciesArray);
5658     }
5659   }
5660   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5661       DependenciesArray, CGF.VoidPtrTy);
5662   return std::make_pair(NumOfElements, DependenciesArray);
5663 }
5664 
5665 Address CGOpenMPRuntime::emitDepobjDependClause(
5666     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
5667     SourceLocation Loc) {
5668   if (Dependencies.DepExprs.empty())
5669     return Address::invalid();
5670   // Process list of dependencies.
5671   ASTContext &C = CGM.getContext();
5672   Address DependenciesArray = Address::invalid();
5673   unsigned NumDependencies = Dependencies.DepExprs.size();
5674   QualType FlagsTy;
5675   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5676   RecordDecl *KmpDependInfoRD =
5677       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5678 
5679   llvm::Value *Size;
5680   // Define type kmp_depend_info[<Dependencies.size()>];
5681   // For depobj reserve one extra element to store the number of elements.
5682   // It is required to handle depobj(x) update(in) construct.
5683   // kmp_depend_info[<Dependencies.size()>] deps;
5684   llvm::Value *NumDepsVal;
5685   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5686   if (const auto *IE =
5687           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5688     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5689     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5690       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5691       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5692       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5693     }
5694     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5695                                     NumDepsVal);
5696     CharUnits SizeInBytes =
5697         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5698     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5699     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5700     NumDepsVal =
5701         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5702   } else {
5703     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5704         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5705         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5706     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5707     Size = CGM.getSize(Sz.alignTo(Align));
5708     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5709   }
5710   // Need to allocate on the dynamic memory.
5711   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5712   // Use default allocator.
5713   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5714   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5715 
5716   llvm::Value *Addr = CGF.EmitRuntimeCall(
5717       createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5718   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5719       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5720   DependenciesArray = Address(Addr, Align);
5721   // Write number of elements in the first element of array for depobj.
5722   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5723   // deps[i].base_addr = NumDependencies;
5724   LValue BaseAddrLVal = CGF.EmitLValueForField(
5725       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5726   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5727   llvm::PointerUnion<unsigned *, LValue *> Pos;
5728   unsigned Idx = 1;
5729   LValue PosLVal;
5730   if (Dependencies.IteratorExpr) {
5731     PosLVal = CGF.MakeAddrLValue(
5732         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5733         C.getSizeType());
5734     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5735                           /*IsInit=*/true);
5736     Pos = &PosLVal;
5737   } else {
5738     Pos = &Idx;
5739   }
5740   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5741   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5742       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5743   return DependenciesArray;
5744 }
5745 
5746 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5747                                         SourceLocation Loc) {
5748   ASTContext &C = CGM.getContext();
5749   QualType FlagsTy;
5750   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5751   LValue Base = CGF.EmitLoadOfPointerLValue(
5752       DepobjLVal.getAddress(CGF),
5753       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5754   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5755   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5756       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5757   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5758       Addr.getPointer(),
5759       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5760   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5761                                                                CGF.VoidPtrTy);
5762   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5763   // Use default allocator.
5764   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5765   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5766 
5767   // _kmpc_free(gtid, addr, nullptr);
5768   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5769 }
5770 
5771 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5772                                        OpenMPDependClauseKind NewDepKind,
5773                                        SourceLocation Loc) {
5774   ASTContext &C = CGM.getContext();
5775   QualType FlagsTy;
5776   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5777   RecordDecl *KmpDependInfoRD =
5778       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5779   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5780   llvm::Value *NumDeps;
5781   LValue Base;
5782   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5783 
5784   Address Begin = Base.getAddress(CGF);
5785   // Cast from pointer to array type to pointer to single element.
5786   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5787   // The basic structure here is a while-do loop.
5788   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5789   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5790   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5791   CGF.EmitBlock(BodyBB);
5792   llvm::PHINode *ElementPHI =
5793       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5794   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5795   Begin = Address(ElementPHI, Begin.getAlignment());
5796   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5797                             Base.getTBAAInfo());
5798   // deps[i].flags = NewDepKind;
5799   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5800   LValue FlagsLVal = CGF.EmitLValueForField(
5801       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5802   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5803                         FlagsLVal);
5804 
5805   // Shift the address forward by one element.
5806   Address ElementNext =
5807       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5808   ElementPHI->addIncoming(ElementNext.getPointer(),
5809                           CGF.Builder.GetInsertBlock());
5810   llvm::Value *IsEmpty =
5811       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5812   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5813   // Done.
5814   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5815 }
5816 
5817 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5818                                    const OMPExecutableDirective &D,
5819                                    llvm::Function *TaskFunction,
5820                                    QualType SharedsTy, Address Shareds,
5821                                    const Expr *IfCond,
5822                                    const OMPTaskDataTy &Data) {
5823   if (!CGF.HaveInsertPoint())
5824     return;
5825 
5826   TaskResultTy Result =
5827       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5828   llvm::Value *NewTask = Result.NewTask;
5829   llvm::Function *TaskEntry = Result.TaskEntry;
5830   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5831   LValue TDBase = Result.TDBase;
5832   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5833   // Process list of dependences.
5834   Address DependenciesArray = Address::invalid();
5835   llvm::Value *NumOfElements;
5836   std::tie(NumOfElements, DependenciesArray) =
5837       emitDependClause(CGF, Data.Dependences, Loc);
5838 
5839   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5840   // libcall.
5841   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5842   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5843   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5844   // list is not empty
5845   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5846   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5847   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5848   llvm::Value *DepTaskArgs[7];
5849   if (!Data.Dependences.empty()) {
5850     DepTaskArgs[0] = UpLoc;
5851     DepTaskArgs[1] = ThreadID;
5852     DepTaskArgs[2] = NewTask;
5853     DepTaskArgs[3] = NumOfElements;
5854     DepTaskArgs[4] = DependenciesArray.getPointer();
5855     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5856     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5857   }
5858   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5859                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5860     if (!Data.Tied) {
5861       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5862       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5863       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5864     }
5865     if (!Data.Dependences.empty()) {
5866       CGF.EmitRuntimeCall(
5867           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5868     } else {
5869       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5870                           TaskArgs);
5871     }
5872     // Check if parent region is untied and build return for untied task;
5873     if (auto *Region =
5874             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5875       Region->emitUntiedSwitch(CGF);
5876   };
5877 
5878   llvm::Value *DepWaitTaskArgs[6];
5879   if (!Data.Dependences.empty()) {
5880     DepWaitTaskArgs[0] = UpLoc;
5881     DepWaitTaskArgs[1] = ThreadID;
5882     DepWaitTaskArgs[2] = NumOfElements;
5883     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5884     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5885     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5886   }
5887   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5888                         &Data, &DepWaitTaskArgs,
5889                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5890     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5891     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5892     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5893     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5894     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5895     // is specified.
5896     if (!Data.Dependences.empty())
5897       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5898                           DepWaitTaskArgs);
5899     // Call proxy_task_entry(gtid, new_task);
5900     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5901                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5902       Action.Enter(CGF);
5903       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5904       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5905                                                           OutlinedFnArgs);
5906     };
5907 
5908     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5909     // kmp_task_t *new_task);
5910     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5911     // kmp_task_t *new_task);
5912     RegionCodeGenTy RCG(CodeGen);
5913     CommonActionTy Action(
5914         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5915         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5916     RCG.setAction(Action);
5917     RCG(CGF);
5918   };
5919 
5920   if (IfCond) {
5921     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5922   } else {
5923     RegionCodeGenTy ThenRCG(ThenCodeGen);
5924     ThenRCG(CGF);
5925   }
5926 }
5927 
5928 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5929                                        const OMPLoopDirective &D,
5930                                        llvm::Function *TaskFunction,
5931                                        QualType SharedsTy, Address Shareds,
5932                                        const Expr *IfCond,
5933                                        const OMPTaskDataTy &Data) {
5934   if (!CGF.HaveInsertPoint())
5935     return;
5936   TaskResultTy Result =
5937       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5938   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5939   // libcall.
5940   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5941   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5942   // sched, kmp_uint64 grainsize, void *task_dup);
5943   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5944   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5945   llvm::Value *IfVal;
5946   if (IfCond) {
5947     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5948                                       /*isSigned=*/true);
5949   } else {
5950     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5951   }
5952 
5953   LValue LBLVal = CGF.EmitLValueForField(
5954       Result.TDBase,
5955       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5956   const auto *LBVar =
5957       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5958   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5959                        LBLVal.getQuals(),
5960                        /*IsInitializer=*/true);
5961   LValue UBLVal = CGF.EmitLValueForField(
5962       Result.TDBase,
5963       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5964   const auto *UBVar =
5965       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5966   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5967                        UBLVal.getQuals(),
5968                        /*IsInitializer=*/true);
5969   LValue StLVal = CGF.EmitLValueForField(
5970       Result.TDBase,
5971       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5972   const auto *StVar =
5973       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5974   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5975                        StLVal.getQuals(),
5976                        /*IsInitializer=*/true);
5977   // Store reductions address.
5978   LValue RedLVal = CGF.EmitLValueForField(
5979       Result.TDBase,
5980       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5981   if (Data.Reductions) {
5982     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5983   } else {
5984     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5985                                CGF.getContext().VoidPtrTy);
5986   }
5987   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5988   llvm::Value *TaskArgs[] = {
5989       UpLoc,
5990       ThreadID,
5991       Result.NewTask,
5992       IfVal,
5993       LBLVal.getPointer(CGF),
5994       UBLVal.getPointer(CGF),
5995       CGF.EmitLoadOfScalar(StLVal, Loc),
5996       llvm::ConstantInt::getSigned(
5997           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5998       llvm::ConstantInt::getSigned(
5999           CGF.IntTy, Data.Schedule.getPointer()
6000                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
6001                          : NoSchedule),
6002       Data.Schedule.getPointer()
6003           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
6004                                       /*isSigned=*/false)
6005           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
6006       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6007                              Result.TaskDupFn, CGF.VoidPtrTy)
6008                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
6009   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
6010 }
6011 
6012 /// Emit reduction operation for each element of array (required for
6013 /// array sections) LHS op = RHS.
6014 /// \param Type Type of array.
6015 /// \param LHSVar Variable on the left side of the reduction operation
6016 /// (references element of array in original variable).
6017 /// \param RHSVar Variable on the right side of the reduction operation
6018 /// (references element of array in original variable).
6019 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
6020 /// RHSVar.
6021 static void EmitOMPAggregateReduction(
6022     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
6023     const VarDecl *RHSVar,
6024     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
6025                                   const Expr *, const Expr *)> &RedOpGen,
6026     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
6027     const Expr *UpExpr = nullptr) {
6028   // Perform element-by-element initialization.
6029   QualType ElementTy;
6030   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
6031   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
6032 
6033   // Drill down to the base element type on both arrays.
6034   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
6035   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
6036 
6037   llvm::Value *RHSBegin = RHSAddr.getPointer();
6038   llvm::Value *LHSBegin = LHSAddr.getPointer();
6039   // Cast from pointer to array type to pointer to single element.
6040   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
6041   // The basic structure here is a while-do loop.
6042   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
6043   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
6044   llvm::Value *IsEmpty =
6045       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
6046   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
6047 
6048   // Enter the loop body, making that address the current address.
6049   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
6050   CGF.EmitBlock(BodyBB);
6051 
6052   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
6053 
6054   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
6055       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
6056   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
6057   Address RHSElementCurrent =
6058       Address(RHSElementPHI,
6059               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6060 
6061   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
6062       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
6063   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
6064   Address LHSElementCurrent =
6065       Address(LHSElementPHI,
6066               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6067 
6068   // Emit copy.
6069   CodeGenFunction::OMPPrivateScope Scope(CGF);
6070   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
6071   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
6072   Scope.Privatize();
6073   RedOpGen(CGF, XExpr, EExpr, UpExpr);
6074   Scope.ForceCleanup();
6075 
6076   // Shift the address forward by one element.
6077   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
6078       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
6079   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
6080       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
6081   // Check whether we've reached the end.
6082   llvm::Value *Done =
6083       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
6084   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
6085   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
6086   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
6087 
6088   // Done.
6089   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
6090 }
6091 
6092 /// Emit reduction combiner. If the combiner is a simple expression emit it as
6093 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
6094 /// UDR combiner function.
6095 static void emitReductionCombiner(CodeGenFunction &CGF,
6096                                   const Expr *ReductionOp) {
6097   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
6098     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
6099       if (const auto *DRE =
6100               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
6101         if (const auto *DRD =
6102                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
6103           std::pair<llvm::Function *, llvm::Function *> Reduction =
6104               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
6105           RValue Func = RValue::get(Reduction.first);
6106           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
6107           CGF.EmitIgnoredExpr(ReductionOp);
6108           return;
6109         }
6110   CGF.EmitIgnoredExpr(ReductionOp);
6111 }
6112 
6113 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
6114     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
6115     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
6116     ArrayRef<const Expr *> ReductionOps) {
6117   ASTContext &C = CGM.getContext();
6118 
6119   // void reduction_func(void *LHSArg, void *RHSArg);
6120   FunctionArgList Args;
6121   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6122                            ImplicitParamDecl::Other);
6123   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6124                            ImplicitParamDecl::Other);
6125   Args.push_back(&LHSArg);
6126   Args.push_back(&RHSArg);
6127   const auto &CGFI =
6128       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6129   std::string Name = getName({"omp", "reduction", "reduction_func"});
6130   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
6131                                     llvm::GlobalValue::InternalLinkage, Name,
6132                                     &CGM.getModule());
6133   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
6134   Fn->setDoesNotRecurse();
6135   CodeGenFunction CGF(CGM);
6136   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
6137 
6138   // Dst = (void*[n])(LHSArg);
6139   // Src = (void*[n])(RHSArg);
6140   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6141       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
6142       ArgsType), CGF.getPointerAlign());
6143   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6144       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
6145       ArgsType), CGF.getPointerAlign());
6146 
6147   //  ...
6148   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
6149   //  ...
6150   CodeGenFunction::OMPPrivateScope Scope(CGF);
6151   auto IPriv = Privates.begin();
6152   unsigned Idx = 0;
6153   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
6154     const auto *RHSVar =
6155         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
6156     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
6157       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
6158     });
6159     const auto *LHSVar =
6160         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
6161     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
6162       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
6163     });
6164     QualType PrivTy = (*IPriv)->getType();
6165     if (PrivTy->isVariablyModifiedType()) {
6166       // Get array size and emit VLA type.
6167       ++Idx;
6168       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
6169       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
6170       const VariableArrayType *VLA =
6171           CGF.getContext().getAsVariableArrayType(PrivTy);
6172       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
6173       CodeGenFunction::OpaqueValueMapping OpaqueMap(
6174           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
6175       CGF.EmitVariablyModifiedType(PrivTy);
6176     }
6177   }
6178   Scope.Privatize();
6179   IPriv = Privates.begin();
6180   auto ILHS = LHSExprs.begin();
6181   auto IRHS = RHSExprs.begin();
6182   for (const Expr *E : ReductionOps) {
6183     if ((*IPriv)->getType()->isArrayType()) {
6184       // Emit reduction for array section.
6185       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6186       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6187       EmitOMPAggregateReduction(
6188           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6189           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6190             emitReductionCombiner(CGF, E);
6191           });
6192     } else {
6193       // Emit reduction for array subscript or single variable.
6194       emitReductionCombiner(CGF, E);
6195     }
6196     ++IPriv;
6197     ++ILHS;
6198     ++IRHS;
6199   }
6200   Scope.ForceCleanup();
6201   CGF.FinishFunction();
6202   return Fn;
6203 }
6204 
6205 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
6206                                                   const Expr *ReductionOp,
6207                                                   const Expr *PrivateRef,
6208                                                   const DeclRefExpr *LHS,
6209                                                   const DeclRefExpr *RHS) {
6210   if (PrivateRef->getType()->isArrayType()) {
6211     // Emit reduction for array section.
6212     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
6213     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
6214     EmitOMPAggregateReduction(
6215         CGF, PrivateRef->getType(), LHSVar, RHSVar,
6216         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6217           emitReductionCombiner(CGF, ReductionOp);
6218         });
6219   } else {
6220     // Emit reduction for array subscript or single variable.
6221     emitReductionCombiner(CGF, ReductionOp);
6222   }
6223 }
6224 
6225 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
6226                                     ArrayRef<const Expr *> Privates,
6227                                     ArrayRef<const Expr *> LHSExprs,
6228                                     ArrayRef<const Expr *> RHSExprs,
6229                                     ArrayRef<const Expr *> ReductionOps,
6230                                     ReductionOptionsTy Options) {
6231   if (!CGF.HaveInsertPoint())
6232     return;
6233 
6234   bool WithNowait = Options.WithNowait;
6235   bool SimpleReduction = Options.SimpleReduction;
6236 
6237   // Next code should be emitted for reduction:
6238   //
6239   // static kmp_critical_name lock = { 0 };
6240   //
6241   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
6242   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
6243   //  ...
6244   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
6245   //  *(Type<n>-1*)rhs[<n>-1]);
6246   // }
6247   //
6248   // ...
6249   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
6250   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6251   // RedList, reduce_func, &<lock>)) {
6252   // case 1:
6253   //  ...
6254   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6255   //  ...
6256   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6257   // break;
6258   // case 2:
6259   //  ...
6260   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6261   //  ...
6262   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
6263   // break;
6264   // default:;
6265   // }
6266   //
6267   // if SimpleReduction is true, only the next code is generated:
6268   //  ...
6269   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6270   //  ...
6271 
6272   ASTContext &C = CGM.getContext();
6273 
6274   if (SimpleReduction) {
6275     CodeGenFunction::RunCleanupsScope Scope(CGF);
6276     auto IPriv = Privates.begin();
6277     auto ILHS = LHSExprs.begin();
6278     auto IRHS = RHSExprs.begin();
6279     for (const Expr *E : ReductionOps) {
6280       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6281                                   cast<DeclRefExpr>(*IRHS));
6282       ++IPriv;
6283       ++ILHS;
6284       ++IRHS;
6285     }
6286     return;
6287   }
6288 
6289   // 1. Build a list of reduction variables.
6290   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
6291   auto Size = RHSExprs.size();
6292   for (const Expr *E : Privates) {
6293     if (E->getType()->isVariablyModifiedType())
6294       // Reserve place for array size.
6295       ++Size;
6296   }
6297   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6298   QualType ReductionArrayTy =
6299       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6300                              /*IndexTypeQuals=*/0);
6301   Address ReductionList =
6302       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6303   auto IPriv = Privates.begin();
6304   unsigned Idx = 0;
6305   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6306     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6307     CGF.Builder.CreateStore(
6308         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6309             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6310         Elem);
6311     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6312       // Store array size.
6313       ++Idx;
6314       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6315       llvm::Value *Size = CGF.Builder.CreateIntCast(
6316           CGF.getVLASize(
6317                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6318               .NumElts,
6319           CGF.SizeTy, /*isSigned=*/false);
6320       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6321                               Elem);
6322     }
6323   }
6324 
6325   // 2. Emit reduce_func().
6326   llvm::Function *ReductionFn = emitReductionFunction(
6327       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6328       LHSExprs, RHSExprs, ReductionOps);
6329 
6330   // 3. Create static kmp_critical_name lock = { 0 };
6331   std::string Name = getName({"reduction"});
6332   llvm::Value *Lock = getCriticalRegionLock(Name);
6333 
6334   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6335   // RedList, reduce_func, &<lock>);
6336   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6337   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6338   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6339   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6340       ReductionList.getPointer(), CGF.VoidPtrTy);
6341   llvm::Value *Args[] = {
6342       IdentTLoc,                             // ident_t *<loc>
6343       ThreadId,                              // i32 <gtid>
6344       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6345       ReductionArrayTySize,                  // size_type sizeof(RedList)
6346       RL,                                    // void *RedList
6347       ReductionFn, // void (*) (void *, void *) <reduce_func>
6348       Lock         // kmp_critical_name *&<lock>
6349   };
6350   llvm::Value *Res = CGF.EmitRuntimeCall(
6351       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6352                                        : OMPRTL__kmpc_reduce),
6353       Args);
6354 
6355   // 5. Build switch(res)
6356   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6357   llvm::SwitchInst *SwInst =
6358       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6359 
6360   // 6. Build case 1:
6361   //  ...
6362   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6363   //  ...
6364   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6365   // break;
6366   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6367   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6368   CGF.EmitBlock(Case1BB);
6369 
6370   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6371   llvm::Value *EndArgs[] = {
6372       IdentTLoc, // ident_t *<loc>
6373       ThreadId,  // i32 <gtid>
6374       Lock       // kmp_critical_name *&<lock>
6375   };
6376   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6377                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6378     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6379     auto IPriv = Privates.begin();
6380     auto ILHS = LHSExprs.begin();
6381     auto IRHS = RHSExprs.begin();
6382     for (const Expr *E : ReductionOps) {
6383       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6384                                      cast<DeclRefExpr>(*IRHS));
6385       ++IPriv;
6386       ++ILHS;
6387       ++IRHS;
6388     }
6389   };
6390   RegionCodeGenTy RCG(CodeGen);
6391   CommonActionTy Action(
6392       nullptr, llvm::None,
6393       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6394                                        : OMPRTL__kmpc_end_reduce),
6395       EndArgs);
6396   RCG.setAction(Action);
6397   RCG(CGF);
6398 
6399   CGF.EmitBranch(DefaultBB);
6400 
6401   // 7. Build case 2:
6402   //  ...
6403   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6404   //  ...
6405   // break;
6406   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6407   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6408   CGF.EmitBlock(Case2BB);
6409 
6410   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6411                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6412     auto ILHS = LHSExprs.begin();
6413     auto IRHS = RHSExprs.begin();
6414     auto IPriv = Privates.begin();
6415     for (const Expr *E : ReductionOps) {
6416       const Expr *XExpr = nullptr;
6417       const Expr *EExpr = nullptr;
6418       const Expr *UpExpr = nullptr;
6419       BinaryOperatorKind BO = BO_Comma;
6420       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6421         if (BO->getOpcode() == BO_Assign) {
6422           XExpr = BO->getLHS();
6423           UpExpr = BO->getRHS();
6424         }
6425       }
6426       // Try to emit update expression as a simple atomic.
6427       const Expr *RHSExpr = UpExpr;
6428       if (RHSExpr) {
6429         // Analyze RHS part of the whole expression.
6430         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6431                 RHSExpr->IgnoreParenImpCasts())) {
6432           // If this is a conditional operator, analyze its condition for
6433           // min/max reduction operator.
6434           RHSExpr = ACO->getCond();
6435         }
6436         if (const auto *BORHS =
6437                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6438           EExpr = BORHS->getRHS();
6439           BO = BORHS->getOpcode();
6440         }
6441       }
6442       if (XExpr) {
6443         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6444         auto &&AtomicRedGen = [BO, VD,
6445                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6446                                     const Expr *EExpr, const Expr *UpExpr) {
6447           LValue X = CGF.EmitLValue(XExpr);
6448           RValue E;
6449           if (EExpr)
6450             E = CGF.EmitAnyExpr(EExpr);
6451           CGF.EmitOMPAtomicSimpleUpdateExpr(
6452               X, E, BO, /*IsXLHSInRHSPart=*/true,
6453               llvm::AtomicOrdering::Monotonic, Loc,
6454               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6455                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6456                 PrivateScope.addPrivate(
6457                     VD, [&CGF, VD, XRValue, Loc]() {
6458                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6459                       CGF.emitOMPSimpleStore(
6460                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6461                           VD->getType().getNonReferenceType(), Loc);
6462                       return LHSTemp;
6463                     });
6464                 (void)PrivateScope.Privatize();
6465                 return CGF.EmitAnyExpr(UpExpr);
6466               });
6467         };
6468         if ((*IPriv)->getType()->isArrayType()) {
6469           // Emit atomic reduction for array section.
6470           const auto *RHSVar =
6471               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6472           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6473                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6474         } else {
6475           // Emit atomic reduction for array subscript or single variable.
6476           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6477         }
6478       } else {
6479         // Emit as a critical region.
6480         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6481                                            const Expr *, const Expr *) {
6482           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6483           std::string Name = RT.getName({"atomic_reduction"});
6484           RT.emitCriticalRegion(
6485               CGF, Name,
6486               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6487                 Action.Enter(CGF);
6488                 emitReductionCombiner(CGF, E);
6489               },
6490               Loc);
6491         };
6492         if ((*IPriv)->getType()->isArrayType()) {
6493           const auto *LHSVar =
6494               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6495           const auto *RHSVar =
6496               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6497           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6498                                     CritRedGen);
6499         } else {
6500           CritRedGen(CGF, nullptr, nullptr, nullptr);
6501         }
6502       }
6503       ++ILHS;
6504       ++IRHS;
6505       ++IPriv;
6506     }
6507   };
6508   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6509   if (!WithNowait) {
6510     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6511     llvm::Value *EndArgs[] = {
6512         IdentTLoc, // ident_t *<loc>
6513         ThreadId,  // i32 <gtid>
6514         Lock       // kmp_critical_name *&<lock>
6515     };
6516     CommonActionTy Action(nullptr, llvm::None,
6517                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6518                           EndArgs);
6519     AtomicRCG.setAction(Action);
6520     AtomicRCG(CGF);
6521   } else {
6522     AtomicRCG(CGF);
6523   }
6524 
6525   CGF.EmitBranch(DefaultBB);
6526   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6527 }
6528 
6529 /// Generates unique name for artificial threadprivate variables.
6530 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6531 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6532                                       const Expr *Ref) {
6533   SmallString<256> Buffer;
6534   llvm::raw_svector_ostream Out(Buffer);
6535   const clang::DeclRefExpr *DE;
6536   const VarDecl *D = ::getBaseDecl(Ref, DE);
6537   if (!D)
6538     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6539   D = D->getCanonicalDecl();
6540   std::string Name = CGM.getOpenMPRuntime().getName(
6541       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6542   Out << Prefix << Name << "_"
6543       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6544   return std::string(Out.str());
6545 }
6546 
6547 /// Emits reduction initializer function:
6548 /// \code
6549 /// void @.red_init(void* %arg) {
6550 /// %0 = bitcast void* %arg to <type>*
6551 /// store <type> <init>, <type>* %0
6552 /// ret void
6553 /// }
6554 /// \endcode
6555 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6556                                            SourceLocation Loc,
6557                                            ReductionCodeGen &RCG, unsigned N) {
6558   ASTContext &C = CGM.getContext();
6559   FunctionArgList Args;
6560   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6561                           ImplicitParamDecl::Other);
6562   Args.emplace_back(&Param);
6563   const auto &FnInfo =
6564       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6565   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6566   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6567   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6568                                     Name, &CGM.getModule());
6569   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6570   Fn->setDoesNotRecurse();
6571   CodeGenFunction CGF(CGM);
6572   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6573   Address PrivateAddr = CGF.EmitLoadOfPointer(
6574       CGF.GetAddrOfLocalVar(&Param),
6575       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6576   llvm::Value *Size = nullptr;
6577   // If the size of the reduction item is non-constant, load it from global
6578   // threadprivate variable.
6579   if (RCG.getSizes(N).second) {
6580     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6581         CGF, CGM.getContext().getSizeType(),
6582         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6583     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6584                                 CGM.getContext().getSizeType(), Loc);
6585   }
6586   RCG.emitAggregateType(CGF, N, Size);
6587   LValue SharedLVal;
6588   // If initializer uses initializer from declare reduction construct, emit a
6589   // pointer to the address of the original reduction item (reuired by reduction
6590   // initializer)
6591   if (RCG.usesReductionInitializer(N)) {
6592     Address SharedAddr =
6593         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6594             CGF, CGM.getContext().VoidPtrTy,
6595             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6596     SharedAddr = CGF.EmitLoadOfPointer(
6597         SharedAddr,
6598         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6599     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6600   } else {
6601     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6602         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6603         CGM.getContext().VoidPtrTy);
6604   }
6605   // Emit the initializer:
6606   // %0 = bitcast void* %arg to <type>*
6607   // store <type> <init>, <type>* %0
6608   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6609                          [](CodeGenFunction &) { return false; });
6610   CGF.FinishFunction();
6611   return Fn;
6612 }
6613 
6614 /// Emits reduction combiner function:
6615 /// \code
6616 /// void @.red_comb(void* %arg0, void* %arg1) {
6617 /// %lhs = bitcast void* %arg0 to <type>*
6618 /// %rhs = bitcast void* %arg1 to <type>*
6619 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6620 /// store <type> %2, <type>* %lhs
6621 /// ret void
6622 /// }
6623 /// \endcode
6624 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6625                                            SourceLocation Loc,
6626                                            ReductionCodeGen &RCG, unsigned N,
6627                                            const Expr *ReductionOp,
6628                                            const Expr *LHS, const Expr *RHS,
6629                                            const Expr *PrivateRef) {
6630   ASTContext &C = CGM.getContext();
6631   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6632   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6633   FunctionArgList Args;
6634   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6635                                C.VoidPtrTy, ImplicitParamDecl::Other);
6636   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6637                             ImplicitParamDecl::Other);
6638   Args.emplace_back(&ParamInOut);
6639   Args.emplace_back(&ParamIn);
6640   const auto &FnInfo =
6641       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6642   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6643   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6644   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6645                                     Name, &CGM.getModule());
6646   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6647   Fn->setDoesNotRecurse();
6648   CodeGenFunction CGF(CGM);
6649   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6650   llvm::Value *Size = nullptr;
6651   // If the size of the reduction item is non-constant, load it from global
6652   // threadprivate variable.
6653   if (RCG.getSizes(N).second) {
6654     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6655         CGF, CGM.getContext().getSizeType(),
6656         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6657     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6658                                 CGM.getContext().getSizeType(), Loc);
6659   }
6660   RCG.emitAggregateType(CGF, N, Size);
6661   // Remap lhs and rhs variables to the addresses of the function arguments.
6662   // %lhs = bitcast void* %arg0 to <type>*
6663   // %rhs = bitcast void* %arg1 to <type>*
6664   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6665   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6666     // Pull out the pointer to the variable.
6667     Address PtrAddr = CGF.EmitLoadOfPointer(
6668         CGF.GetAddrOfLocalVar(&ParamInOut),
6669         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6670     return CGF.Builder.CreateElementBitCast(
6671         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6672   });
6673   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6674     // Pull out the pointer to the variable.
6675     Address PtrAddr = CGF.EmitLoadOfPointer(
6676         CGF.GetAddrOfLocalVar(&ParamIn),
6677         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6678     return CGF.Builder.CreateElementBitCast(
6679         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6680   });
6681   PrivateScope.Privatize();
6682   // Emit the combiner body:
6683   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6684   // store <type> %2, <type>* %lhs
6685   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6686       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6687       cast<DeclRefExpr>(RHS));
6688   CGF.FinishFunction();
6689   return Fn;
6690 }
6691 
6692 /// Emits reduction finalizer function:
6693 /// \code
6694 /// void @.red_fini(void* %arg) {
6695 /// %0 = bitcast void* %arg to <type>*
6696 /// <destroy>(<type>* %0)
6697 /// ret void
6698 /// }
6699 /// \endcode
6700 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6701                                            SourceLocation Loc,
6702                                            ReductionCodeGen &RCG, unsigned N) {
6703   if (!RCG.needCleanups(N))
6704     return nullptr;
6705   ASTContext &C = CGM.getContext();
6706   FunctionArgList Args;
6707   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6708                           ImplicitParamDecl::Other);
6709   Args.emplace_back(&Param);
6710   const auto &FnInfo =
6711       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6712   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6713   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6714   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6715                                     Name, &CGM.getModule());
6716   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6717   Fn->setDoesNotRecurse();
6718   CodeGenFunction CGF(CGM);
6719   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6720   Address PrivateAddr = CGF.EmitLoadOfPointer(
6721       CGF.GetAddrOfLocalVar(&Param),
6722       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6723   llvm::Value *Size = nullptr;
6724   // If the size of the reduction item is non-constant, load it from global
6725   // threadprivate variable.
6726   if (RCG.getSizes(N).second) {
6727     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6728         CGF, CGM.getContext().getSizeType(),
6729         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6730     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6731                                 CGM.getContext().getSizeType(), Loc);
6732   }
6733   RCG.emitAggregateType(CGF, N, Size);
6734   // Emit the finalizer body:
6735   // <destroy>(<type>* %0)
6736   RCG.emitCleanups(CGF, N, PrivateAddr);
6737   CGF.FinishFunction(Loc);
6738   return Fn;
6739 }
6740 
6741 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6742     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6743     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6744   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6745     return nullptr;
6746 
6747   // Build typedef struct:
6748   // kmp_task_red_input {
6749   //   void *reduce_shar; // shared reduction item
6750   //   size_t reduce_size; // size of data item
6751   //   void *reduce_init; // data initialization routine
6752   //   void *reduce_fini; // data finalization routine
6753   //   void *reduce_comb; // data combiner routine
6754   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6755   // } kmp_task_red_input_t;
6756   ASTContext &C = CGM.getContext();
6757   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6758   RD->startDefinition();
6759   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6760   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6761   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6762   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6763   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6764   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6765       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6766   RD->completeDefinition();
6767   QualType RDType = C.getRecordType(RD);
6768   unsigned Size = Data.ReductionVars.size();
6769   llvm::APInt ArraySize(/*numBits=*/64, Size);
6770   QualType ArrayRDType = C.getConstantArrayType(
6771       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6772   // kmp_task_red_input_t .rd_input.[Size];
6773   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6774   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6775                        Data.ReductionOps);
6776   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6777     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6778     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6779                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6780     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6781         TaskRedInput.getPointer(), Idxs,
6782         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6783         ".rd_input.gep.");
6784     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6785     // ElemLVal.reduce_shar = &Shareds[Cnt];
6786     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6787     RCG.emitSharedLValue(CGF, Cnt);
6788     llvm::Value *CastedShared =
6789         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6790     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6791     RCG.emitAggregateType(CGF, Cnt);
6792     llvm::Value *SizeValInChars;
6793     llvm::Value *SizeVal;
6794     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6795     // We use delayed creation/initialization for VLAs, array sections and
6796     // custom reduction initializations. It is required because runtime does not
6797     // provide the way to pass the sizes of VLAs/array sections to
6798     // initializer/combiner/finalizer functions and does not pass the pointer to
6799     // original reduction item to the initializer. Instead threadprivate global
6800     // variables are used to store these values and use them in the functions.
6801     bool DelayedCreation = !!SizeVal;
6802     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6803                                                /*isSigned=*/false);
6804     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6805     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6806     // ElemLVal.reduce_init = init;
6807     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6808     llvm::Value *InitAddr =
6809         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6810     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6811     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6812     // ElemLVal.reduce_fini = fini;
6813     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6814     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6815     llvm::Value *FiniAddr = Fini
6816                                 ? CGF.EmitCastToVoidPtr(Fini)
6817                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6818     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6819     // ElemLVal.reduce_comb = comb;
6820     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6821     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6822         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6823         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6824     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6825     // ElemLVal.flags = 0;
6826     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6827     if (DelayedCreation) {
6828       CGF.EmitStoreOfScalar(
6829           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6830           FlagsLVal);
6831     } else
6832       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6833                                  FlagsLVal.getType());
6834   }
6835   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6836   // *data);
6837   llvm::Value *Args[] = {
6838       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6839                                 /*isSigned=*/true),
6840       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6841       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6842                                                       CGM.VoidPtrTy)};
6843   return CGF.EmitRuntimeCall(
6844       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6845 }
6846 
6847 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6848                                               SourceLocation Loc,
6849                                               ReductionCodeGen &RCG,
6850                                               unsigned N) {
6851   auto Sizes = RCG.getSizes(N);
6852   // Emit threadprivate global variable if the type is non-constant
6853   // (Sizes.second = nullptr).
6854   if (Sizes.second) {
6855     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6856                                                      /*isSigned=*/false);
6857     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6858         CGF, CGM.getContext().getSizeType(),
6859         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6860     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6861   }
6862   // Store address of the original reduction item if custom initializer is used.
6863   if (RCG.usesReductionInitializer(N)) {
6864     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6865         CGF, CGM.getContext().VoidPtrTy,
6866         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6867     CGF.Builder.CreateStore(
6868         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6869             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6870         SharedAddr, /*IsVolatile=*/false);
6871   }
6872 }
6873 
6874 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6875                                               SourceLocation Loc,
6876                                               llvm::Value *ReductionsPtr,
6877                                               LValue SharedLVal) {
6878   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6879   // *d);
6880   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6881                                                    CGM.IntTy,
6882                                                    /*isSigned=*/true),
6883                          ReductionsPtr,
6884                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6885                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6886   return Address(
6887       CGF.EmitRuntimeCall(
6888           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6889       SharedLVal.getAlignment());
6890 }
6891 
6892 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6893                                        SourceLocation Loc) {
6894   if (!CGF.HaveInsertPoint())
6895     return;
6896 
6897   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6898   if (OMPBuilder) {
6899     OMPBuilder->CreateTaskwait(CGF.Builder);
6900   } else {
6901     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6902     // global_tid);
6903     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6904     // Ignore return result until untied tasks are supported.
6905     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6906   }
6907 
6908   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6909     Region->emitUntiedSwitch(CGF);
6910 }
6911 
6912 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6913                                            OpenMPDirectiveKind InnerKind,
6914                                            const RegionCodeGenTy &CodeGen,
6915                                            bool HasCancel) {
6916   if (!CGF.HaveInsertPoint())
6917     return;
6918   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6919   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6920 }
6921 
6922 namespace {
6923 enum RTCancelKind {
6924   CancelNoreq = 0,
6925   CancelParallel = 1,
6926   CancelLoop = 2,
6927   CancelSections = 3,
6928   CancelTaskgroup = 4
6929 };
6930 } // anonymous namespace
6931 
6932 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6933   RTCancelKind CancelKind = CancelNoreq;
6934   if (CancelRegion == OMPD_parallel)
6935     CancelKind = CancelParallel;
6936   else if (CancelRegion == OMPD_for)
6937     CancelKind = CancelLoop;
6938   else if (CancelRegion == OMPD_sections)
6939     CancelKind = CancelSections;
6940   else {
6941     assert(CancelRegion == OMPD_taskgroup);
6942     CancelKind = CancelTaskgroup;
6943   }
6944   return CancelKind;
6945 }
6946 
6947 void CGOpenMPRuntime::emitCancellationPointCall(
6948     CodeGenFunction &CGF, SourceLocation Loc,
6949     OpenMPDirectiveKind CancelRegion) {
6950   if (!CGF.HaveInsertPoint())
6951     return;
6952   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6953   // global_tid, kmp_int32 cncl_kind);
6954   if (auto *OMPRegionInfo =
6955           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6956     // For 'cancellation point taskgroup', the task region info may not have a
6957     // cancel. This may instead happen in another adjacent task.
6958     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6959       llvm::Value *Args[] = {
6960           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6961           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6962       // Ignore return result until untied tasks are supported.
6963       llvm::Value *Result = CGF.EmitRuntimeCall(
6964           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6965       // if (__kmpc_cancellationpoint()) {
6966       //   exit from construct;
6967       // }
6968       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6969       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6970       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6971       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6972       CGF.EmitBlock(ExitBB);
6973       // exit from construct;
6974       CodeGenFunction::JumpDest CancelDest =
6975           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6976       CGF.EmitBranchThroughCleanup(CancelDest);
6977       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6978     }
6979   }
6980 }
6981 
6982 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6983                                      const Expr *IfCond,
6984                                      OpenMPDirectiveKind CancelRegion) {
6985   if (!CGF.HaveInsertPoint())
6986     return;
6987   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6988   // kmp_int32 cncl_kind);
6989   if (auto *OMPRegionInfo =
6990           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6991     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6992                                                         PrePostActionTy &) {
6993       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6994       llvm::Value *Args[] = {
6995           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6996           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6997       // Ignore return result until untied tasks are supported.
6998       llvm::Value *Result = CGF.EmitRuntimeCall(
6999           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
7000       // if (__kmpc_cancel()) {
7001       //   exit from construct;
7002       // }
7003       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7004       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7005       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7006       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7007       CGF.EmitBlock(ExitBB);
7008       // exit from construct;
7009       CodeGenFunction::JumpDest CancelDest =
7010           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7011       CGF.EmitBranchThroughCleanup(CancelDest);
7012       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7013     };
7014     if (IfCond) {
7015       emitIfClause(CGF, IfCond, ThenGen,
7016                    [](CodeGenFunction &, PrePostActionTy &) {});
7017     } else {
7018       RegionCodeGenTy ThenRCG(ThenGen);
7019       ThenRCG(CGF);
7020     }
7021   }
7022 }
7023 
7024 void CGOpenMPRuntime::emitTargetOutlinedFunction(
7025     const OMPExecutableDirective &D, StringRef ParentName,
7026     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7027     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7028   assert(!ParentName.empty() && "Invalid target region parent name!");
7029   HasEmittedTargetRegion = true;
7030   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
7031                                    IsOffloadEntry, CodeGen);
7032 }
7033 
7034 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
7035     const OMPExecutableDirective &D, StringRef ParentName,
7036     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7037     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7038   // Create a unique name for the entry function using the source location
7039   // information of the current target region. The name will be something like:
7040   //
7041   // __omp_offloading_DD_FFFF_PP_lBB
7042   //
7043   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
7044   // mangled name of the function that encloses the target region and BB is the
7045   // line number of the target region.
7046 
7047   unsigned DeviceID;
7048   unsigned FileID;
7049   unsigned Line;
7050   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
7051                            Line);
7052   SmallString<64> EntryFnName;
7053   {
7054     llvm::raw_svector_ostream OS(EntryFnName);
7055     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
7056        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
7057   }
7058 
7059   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7060 
7061   CodeGenFunction CGF(CGM, true);
7062   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
7063   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7064 
7065   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
7066 
7067   // If this target outline function is not an offload entry, we don't need to
7068   // register it.
7069   if (!IsOffloadEntry)
7070     return;
7071 
7072   // The target region ID is used by the runtime library to identify the current
7073   // target region, so it only has to be unique and not necessarily point to
7074   // anything. It could be the pointer to the outlined function that implements
7075   // the target region, but we aren't using that so that the compiler doesn't
7076   // need to keep that, and could therefore inline the host function if proven
7077   // worthwhile during optimization. In the other hand, if emitting code for the
7078   // device, the ID has to be the function address so that it can retrieved from
7079   // the offloading entry and launched by the runtime library. We also mark the
7080   // outlined function to have external linkage in case we are emitting code for
7081   // the device, because these functions will be entry points to the device.
7082 
7083   if (CGM.getLangOpts().OpenMPIsDevice) {
7084     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
7085     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
7086     OutlinedFn->setDSOLocal(false);
7087   } else {
7088     std::string Name = getName({EntryFnName, "region_id"});
7089     OutlinedFnID = new llvm::GlobalVariable(
7090         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
7091         llvm::GlobalValue::WeakAnyLinkage,
7092         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
7093   }
7094 
7095   // Register the information for the entry associated with this target region.
7096   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
7097       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
7098       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
7099 }
7100 
7101 /// Checks if the expression is constant or does not have non-trivial function
7102 /// calls.
7103 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
7104   // We can skip constant expressions.
7105   // We can skip expressions with trivial calls or simple expressions.
7106   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
7107           !E->hasNonTrivialCall(Ctx)) &&
7108          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
7109 }
7110 
7111 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
7112                                                     const Stmt *Body) {
7113   const Stmt *Child = Body->IgnoreContainers();
7114   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
7115     Child = nullptr;
7116     for (const Stmt *S : C->body()) {
7117       if (const auto *E = dyn_cast<Expr>(S)) {
7118         if (isTrivial(Ctx, E))
7119           continue;
7120       }
7121       // Some of the statements can be ignored.
7122       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
7123           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
7124         continue;
7125       // Analyze declarations.
7126       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
7127         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
7128               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
7129                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
7130                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
7131                   isa<UsingDirectiveDecl>(D) ||
7132                   isa<OMPDeclareReductionDecl>(D) ||
7133                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
7134                 return true;
7135               const auto *VD = dyn_cast<VarDecl>(D);
7136               if (!VD)
7137                 return false;
7138               return VD->isConstexpr() ||
7139                      ((VD->getType().isTrivialType(Ctx) ||
7140                        VD->getType()->isReferenceType()) &&
7141                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
7142             }))
7143           continue;
7144       }
7145       // Found multiple children - cannot get the one child only.
7146       if (Child)
7147         return nullptr;
7148       Child = S;
7149     }
7150     if (Child)
7151       Child = Child->IgnoreContainers();
7152   }
7153   return Child;
7154 }
7155 
7156 /// Emit the number of teams for a target directive.  Inspect the num_teams
7157 /// clause associated with a teams construct combined or closely nested
7158 /// with the target directive.
7159 ///
7160 /// Emit a team of size one for directives such as 'target parallel' that
7161 /// have no associated teams construct.
7162 ///
7163 /// Otherwise, return nullptr.
7164 static llvm::Value *
7165 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
7166                                const OMPExecutableDirective &D) {
7167   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7168          "Clauses associated with the teams directive expected to be emitted "
7169          "only for the host!");
7170   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7171   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7172          "Expected target-based executable directive.");
7173   CGBuilderTy &Bld = CGF.Builder;
7174   switch (DirectiveKind) {
7175   case OMPD_target: {
7176     const auto *CS = D.getInnermostCapturedStmt();
7177     const auto *Body =
7178         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
7179     const Stmt *ChildStmt =
7180         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
7181     if (const auto *NestedDir =
7182             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
7183       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
7184         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
7185           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7186           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7187           const Expr *NumTeams =
7188               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7189           llvm::Value *NumTeamsVal =
7190               CGF.EmitScalarExpr(NumTeams,
7191                                  /*IgnoreResultAssign*/ true);
7192           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7193                                    /*isSigned=*/true);
7194         }
7195         return Bld.getInt32(0);
7196       }
7197       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
7198           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
7199         return Bld.getInt32(1);
7200       return Bld.getInt32(0);
7201     }
7202     return nullptr;
7203   }
7204   case OMPD_target_teams:
7205   case OMPD_target_teams_distribute:
7206   case OMPD_target_teams_distribute_simd:
7207   case OMPD_target_teams_distribute_parallel_for:
7208   case OMPD_target_teams_distribute_parallel_for_simd: {
7209     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
7210       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
7211       const Expr *NumTeams =
7212           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7213       llvm::Value *NumTeamsVal =
7214           CGF.EmitScalarExpr(NumTeams,
7215                              /*IgnoreResultAssign*/ true);
7216       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7217                                /*isSigned=*/true);
7218     }
7219     return Bld.getInt32(0);
7220   }
7221   case OMPD_target_parallel:
7222   case OMPD_target_parallel_for:
7223   case OMPD_target_parallel_for_simd:
7224   case OMPD_target_simd:
7225     return Bld.getInt32(1);
7226   case OMPD_parallel:
7227   case OMPD_for:
7228   case OMPD_parallel_for:
7229   case OMPD_parallel_master:
7230   case OMPD_parallel_sections:
7231   case OMPD_for_simd:
7232   case OMPD_parallel_for_simd:
7233   case OMPD_cancel:
7234   case OMPD_cancellation_point:
7235   case OMPD_ordered:
7236   case OMPD_threadprivate:
7237   case OMPD_allocate:
7238   case OMPD_task:
7239   case OMPD_simd:
7240   case OMPD_sections:
7241   case OMPD_section:
7242   case OMPD_single:
7243   case OMPD_master:
7244   case OMPD_critical:
7245   case OMPD_taskyield:
7246   case OMPD_barrier:
7247   case OMPD_taskwait:
7248   case OMPD_taskgroup:
7249   case OMPD_atomic:
7250   case OMPD_flush:
7251   case OMPD_depobj:
7252   case OMPD_scan:
7253   case OMPD_teams:
7254   case OMPD_target_data:
7255   case OMPD_target_exit_data:
7256   case OMPD_target_enter_data:
7257   case OMPD_distribute:
7258   case OMPD_distribute_simd:
7259   case OMPD_distribute_parallel_for:
7260   case OMPD_distribute_parallel_for_simd:
7261   case OMPD_teams_distribute:
7262   case OMPD_teams_distribute_simd:
7263   case OMPD_teams_distribute_parallel_for:
7264   case OMPD_teams_distribute_parallel_for_simd:
7265   case OMPD_target_update:
7266   case OMPD_declare_simd:
7267   case OMPD_declare_variant:
7268   case OMPD_begin_declare_variant:
7269   case OMPD_end_declare_variant:
7270   case OMPD_declare_target:
7271   case OMPD_end_declare_target:
7272   case OMPD_declare_reduction:
7273   case OMPD_declare_mapper:
7274   case OMPD_taskloop:
7275   case OMPD_taskloop_simd:
7276   case OMPD_master_taskloop:
7277   case OMPD_master_taskloop_simd:
7278   case OMPD_parallel_master_taskloop:
7279   case OMPD_parallel_master_taskloop_simd:
7280   case OMPD_requires:
7281   case OMPD_unknown:
7282     break;
7283   }
7284   llvm_unreachable("Unexpected directive kind.");
7285 }
7286 
7287 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
7288                                   llvm::Value *DefaultThreadLimitVal) {
7289   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7290       CGF.getContext(), CS->getCapturedStmt());
7291   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7292     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
7293       llvm::Value *NumThreads = nullptr;
7294       llvm::Value *CondVal = nullptr;
7295       // Handle if clause. If if clause present, the number of threads is
7296       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7297       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7298         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7299         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7300         const OMPIfClause *IfClause = nullptr;
7301         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7302           if (C->getNameModifier() == OMPD_unknown ||
7303               C->getNameModifier() == OMPD_parallel) {
7304             IfClause = C;
7305             break;
7306           }
7307         }
7308         if (IfClause) {
7309           const Expr *Cond = IfClause->getCondition();
7310           bool Result;
7311           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7312             if (!Result)
7313               return CGF.Builder.getInt32(1);
7314           } else {
7315             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7316             if (const auto *PreInit =
7317                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7318               for (const auto *I : PreInit->decls()) {
7319                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7320                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7321                 } else {
7322                   CodeGenFunction::AutoVarEmission Emission =
7323                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7324                   CGF.EmitAutoVarCleanups(Emission);
7325                 }
7326               }
7327             }
7328             CondVal = CGF.EvaluateExprAsBool(Cond);
7329           }
7330         }
7331       }
7332       // Check the value of num_threads clause iff if clause was not specified
7333       // or is not evaluated to false.
7334       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7335         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7336         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7337         const auto *NumThreadsClause =
7338             Dir->getSingleClause<OMPNumThreadsClause>();
7339         CodeGenFunction::LexicalScope Scope(
7340             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7341         if (const auto *PreInit =
7342                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7343           for (const auto *I : PreInit->decls()) {
7344             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7345               CGF.EmitVarDecl(cast<VarDecl>(*I));
7346             } else {
7347               CodeGenFunction::AutoVarEmission Emission =
7348                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7349               CGF.EmitAutoVarCleanups(Emission);
7350             }
7351           }
7352         }
7353         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7354         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7355                                                /*isSigned=*/false);
7356         if (DefaultThreadLimitVal)
7357           NumThreads = CGF.Builder.CreateSelect(
7358               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7359               DefaultThreadLimitVal, NumThreads);
7360       } else {
7361         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7362                                            : CGF.Builder.getInt32(0);
7363       }
7364       // Process condition of the if clause.
7365       if (CondVal) {
7366         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7367                                               CGF.Builder.getInt32(1));
7368       }
7369       return NumThreads;
7370     }
7371     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7372       return CGF.Builder.getInt32(1);
7373     return DefaultThreadLimitVal;
7374   }
7375   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7376                                : CGF.Builder.getInt32(0);
7377 }
7378 
7379 /// Emit the number of threads for a target directive.  Inspect the
7380 /// thread_limit clause associated with a teams construct combined or closely
7381 /// nested with the target directive.
7382 ///
7383 /// Emit the num_threads clause for directives such as 'target parallel' that
7384 /// have no associated teams construct.
7385 ///
7386 /// Otherwise, return nullptr.
7387 static llvm::Value *
7388 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7389                                  const OMPExecutableDirective &D) {
7390   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7391          "Clauses associated with the teams directive expected to be emitted "
7392          "only for the host!");
7393   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7394   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7395          "Expected target-based executable directive.");
7396   CGBuilderTy &Bld = CGF.Builder;
7397   llvm::Value *ThreadLimitVal = nullptr;
7398   llvm::Value *NumThreadsVal = nullptr;
7399   switch (DirectiveKind) {
7400   case OMPD_target: {
7401     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7402     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7403       return NumThreads;
7404     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7405         CGF.getContext(), CS->getCapturedStmt());
7406     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7407       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7408         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7409         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7410         const auto *ThreadLimitClause =
7411             Dir->getSingleClause<OMPThreadLimitClause>();
7412         CodeGenFunction::LexicalScope Scope(
7413             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7414         if (const auto *PreInit =
7415                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7416           for (const auto *I : PreInit->decls()) {
7417             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7418               CGF.EmitVarDecl(cast<VarDecl>(*I));
7419             } else {
7420               CodeGenFunction::AutoVarEmission Emission =
7421                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7422               CGF.EmitAutoVarCleanups(Emission);
7423             }
7424           }
7425         }
7426         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7427             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7428         ThreadLimitVal =
7429             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7430       }
7431       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7432           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7433         CS = Dir->getInnermostCapturedStmt();
7434         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7435             CGF.getContext(), CS->getCapturedStmt());
7436         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7437       }
7438       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7439           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7440         CS = Dir->getInnermostCapturedStmt();
7441         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7442           return NumThreads;
7443       }
7444       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7445         return Bld.getInt32(1);
7446     }
7447     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7448   }
7449   case OMPD_target_teams: {
7450     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7451       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7452       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7453       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7454           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7455       ThreadLimitVal =
7456           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7457     }
7458     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7459     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7460       return NumThreads;
7461     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7462         CGF.getContext(), CS->getCapturedStmt());
7463     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7464       if (Dir->getDirectiveKind() == OMPD_distribute) {
7465         CS = Dir->getInnermostCapturedStmt();
7466         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7467           return NumThreads;
7468       }
7469     }
7470     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7471   }
7472   case OMPD_target_teams_distribute:
7473     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7474       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7475       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7476       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7477           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7478       ThreadLimitVal =
7479           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7480     }
7481     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7482   case OMPD_target_parallel:
7483   case OMPD_target_parallel_for:
7484   case OMPD_target_parallel_for_simd:
7485   case OMPD_target_teams_distribute_parallel_for:
7486   case OMPD_target_teams_distribute_parallel_for_simd: {
7487     llvm::Value *CondVal = nullptr;
7488     // Handle if clause. If if clause present, the number of threads is
7489     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7490     if (D.hasClausesOfKind<OMPIfClause>()) {
7491       const OMPIfClause *IfClause = nullptr;
7492       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7493         if (C->getNameModifier() == OMPD_unknown ||
7494             C->getNameModifier() == OMPD_parallel) {
7495           IfClause = C;
7496           break;
7497         }
7498       }
7499       if (IfClause) {
7500         const Expr *Cond = IfClause->getCondition();
7501         bool Result;
7502         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7503           if (!Result)
7504             return Bld.getInt32(1);
7505         } else {
7506           CodeGenFunction::RunCleanupsScope Scope(CGF);
7507           CondVal = CGF.EvaluateExprAsBool(Cond);
7508         }
7509       }
7510     }
7511     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7512       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7513       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7514       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7515           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7516       ThreadLimitVal =
7517           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7518     }
7519     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7520       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7521       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7522       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7523           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7524       NumThreadsVal =
7525           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7526       ThreadLimitVal = ThreadLimitVal
7527                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7528                                                                 ThreadLimitVal),
7529                                               NumThreadsVal, ThreadLimitVal)
7530                            : NumThreadsVal;
7531     }
7532     if (!ThreadLimitVal)
7533       ThreadLimitVal = Bld.getInt32(0);
7534     if (CondVal)
7535       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7536     return ThreadLimitVal;
7537   }
7538   case OMPD_target_teams_distribute_simd:
7539   case OMPD_target_simd:
7540     return Bld.getInt32(1);
7541   case OMPD_parallel:
7542   case OMPD_for:
7543   case OMPD_parallel_for:
7544   case OMPD_parallel_master:
7545   case OMPD_parallel_sections:
7546   case OMPD_for_simd:
7547   case OMPD_parallel_for_simd:
7548   case OMPD_cancel:
7549   case OMPD_cancellation_point:
7550   case OMPD_ordered:
7551   case OMPD_threadprivate:
7552   case OMPD_allocate:
7553   case OMPD_task:
7554   case OMPD_simd:
7555   case OMPD_sections:
7556   case OMPD_section:
7557   case OMPD_single:
7558   case OMPD_master:
7559   case OMPD_critical:
7560   case OMPD_taskyield:
7561   case OMPD_barrier:
7562   case OMPD_taskwait:
7563   case OMPD_taskgroup:
7564   case OMPD_atomic:
7565   case OMPD_flush:
7566   case OMPD_depobj:
7567   case OMPD_scan:
7568   case OMPD_teams:
7569   case OMPD_target_data:
7570   case OMPD_target_exit_data:
7571   case OMPD_target_enter_data:
7572   case OMPD_distribute:
7573   case OMPD_distribute_simd:
7574   case OMPD_distribute_parallel_for:
7575   case OMPD_distribute_parallel_for_simd:
7576   case OMPD_teams_distribute:
7577   case OMPD_teams_distribute_simd:
7578   case OMPD_teams_distribute_parallel_for:
7579   case OMPD_teams_distribute_parallel_for_simd:
7580   case OMPD_target_update:
7581   case OMPD_declare_simd:
7582   case OMPD_declare_variant:
7583   case OMPD_begin_declare_variant:
7584   case OMPD_end_declare_variant:
7585   case OMPD_declare_target:
7586   case OMPD_end_declare_target:
7587   case OMPD_declare_reduction:
7588   case OMPD_declare_mapper:
7589   case OMPD_taskloop:
7590   case OMPD_taskloop_simd:
7591   case OMPD_master_taskloop:
7592   case OMPD_master_taskloop_simd:
7593   case OMPD_parallel_master_taskloop:
7594   case OMPD_parallel_master_taskloop_simd:
7595   case OMPD_requires:
7596   case OMPD_unknown:
7597     break;
7598   }
7599   llvm_unreachable("Unsupported directive kind.");
7600 }
7601 
7602 namespace {
7603 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7604 
7605 // Utility to handle information from clauses associated with a given
7606 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7607 // It provides a convenient interface to obtain the information and generate
7608 // code for that information.
7609 class MappableExprsHandler {
7610 public:
7611   /// Values for bit flags used to specify the mapping type for
7612   /// offloading.
7613   enum OpenMPOffloadMappingFlags : uint64_t {
7614     /// No flags
7615     OMP_MAP_NONE = 0x0,
7616     /// Allocate memory on the device and move data from host to device.
7617     OMP_MAP_TO = 0x01,
7618     /// Allocate memory on the device and move data from device to host.
7619     OMP_MAP_FROM = 0x02,
7620     /// Always perform the requested mapping action on the element, even
7621     /// if it was already mapped before.
7622     OMP_MAP_ALWAYS = 0x04,
7623     /// Delete the element from the device environment, ignoring the
7624     /// current reference count associated with the element.
7625     OMP_MAP_DELETE = 0x08,
7626     /// The element being mapped is a pointer-pointee pair; both the
7627     /// pointer and the pointee should be mapped.
7628     OMP_MAP_PTR_AND_OBJ = 0x10,
7629     /// This flags signals that the base address of an entry should be
7630     /// passed to the target kernel as an argument.
7631     OMP_MAP_TARGET_PARAM = 0x20,
7632     /// Signal that the runtime library has to return the device pointer
7633     /// in the current position for the data being mapped. Used when we have the
7634     /// use_device_ptr clause.
7635     OMP_MAP_RETURN_PARAM = 0x40,
7636     /// This flag signals that the reference being passed is a pointer to
7637     /// private data.
7638     OMP_MAP_PRIVATE = 0x80,
7639     /// Pass the element to the device by value.
7640     OMP_MAP_LITERAL = 0x100,
7641     /// Implicit map
7642     OMP_MAP_IMPLICIT = 0x200,
7643     /// Close is a hint to the runtime to allocate memory close to
7644     /// the target device.
7645     OMP_MAP_CLOSE = 0x400,
7646     /// The 16 MSBs of the flags indicate whether the entry is member of some
7647     /// struct/class.
7648     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7649     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7650   };
7651 
7652   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7653   static unsigned getFlagMemberOffset() {
7654     unsigned Offset = 0;
7655     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7656          Remain = Remain >> 1)
7657       Offset++;
7658     return Offset;
7659   }
7660 
7661   /// Class that associates information with a base pointer to be passed to the
7662   /// runtime library.
7663   class BasePointerInfo {
7664     /// The base pointer.
7665     llvm::Value *Ptr = nullptr;
7666     /// The base declaration that refers to this device pointer, or null if
7667     /// there is none.
7668     const ValueDecl *DevPtrDecl = nullptr;
7669 
7670   public:
7671     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7672         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7673     llvm::Value *operator*() const { return Ptr; }
7674     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7675     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7676   };
7677 
7678   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7679   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7680   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7681 
7682   /// Map between a struct and the its lowest & highest elements which have been
7683   /// mapped.
7684   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7685   ///                    HE(FieldIndex, Pointer)}
7686   struct StructRangeInfoTy {
7687     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7688         0, Address::invalid()};
7689     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7690         0, Address::invalid()};
7691     Address Base = Address::invalid();
7692   };
7693 
7694 private:
7695   /// Kind that defines how a device pointer has to be returned.
7696   struct MapInfo {
7697     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7698     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7699     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7700     bool ReturnDevicePointer = false;
7701     bool IsImplicit = false;
7702 
7703     MapInfo() = default;
7704     MapInfo(
7705         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7706         OpenMPMapClauseKind MapType,
7707         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7708         bool ReturnDevicePointer, bool IsImplicit)
7709         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7710           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7711   };
7712 
7713   /// If use_device_ptr is used on a pointer which is a struct member and there
7714   /// is no map information about it, then emission of that entry is deferred
7715   /// until the whole struct has been processed.
7716   struct DeferredDevicePtrEntryTy {
7717     const Expr *IE = nullptr;
7718     const ValueDecl *VD = nullptr;
7719 
7720     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7721         : IE(IE), VD(VD) {}
7722   };
7723 
7724   /// The target directive from where the mappable clauses were extracted. It
7725   /// is either a executable directive or a user-defined mapper directive.
7726   llvm::PointerUnion<const OMPExecutableDirective *,
7727                      const OMPDeclareMapperDecl *>
7728       CurDir;
7729 
7730   /// Function the directive is being generated for.
7731   CodeGenFunction &CGF;
7732 
7733   /// Set of all first private variables in the current directive.
7734   /// bool data is set to true if the variable is implicitly marked as
7735   /// firstprivate, false otherwise.
7736   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7737 
7738   /// Map between device pointer declarations and their expression components.
7739   /// The key value for declarations in 'this' is null.
7740   llvm::DenseMap<
7741       const ValueDecl *,
7742       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7743       DevPointersMap;
7744 
7745   llvm::Value *getExprTypeSize(const Expr *E) const {
7746     QualType ExprTy = E->getType().getCanonicalType();
7747 
7748     // Calculate the size for array shaping expression.
7749     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7750       llvm::Value *Size =
7751           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7752       for (const Expr *SE : OAE->getDimensions()) {
7753         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7754         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7755                                       CGF.getContext().getSizeType(),
7756                                       SE->getExprLoc());
7757         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7758       }
7759       return Size;
7760     }
7761 
7762     // Reference types are ignored for mapping purposes.
7763     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7764       ExprTy = RefTy->getPointeeType().getCanonicalType();
7765 
7766     // Given that an array section is considered a built-in type, we need to
7767     // do the calculation based on the length of the section instead of relying
7768     // on CGF.getTypeSize(E->getType()).
7769     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7770       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7771                             OAE->getBase()->IgnoreParenImpCasts())
7772                             .getCanonicalType();
7773 
7774       // If there is no length associated with the expression and lower bound is
7775       // not specified too, that means we are using the whole length of the
7776       // base.
7777       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7778           !OAE->getLowerBound())
7779         return CGF.getTypeSize(BaseTy);
7780 
7781       llvm::Value *ElemSize;
7782       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7783         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7784       } else {
7785         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7786         assert(ATy && "Expecting array type if not a pointer type.");
7787         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7788       }
7789 
7790       // If we don't have a length at this point, that is because we have an
7791       // array section with a single element.
7792       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7793         return ElemSize;
7794 
7795       if (const Expr *LenExpr = OAE->getLength()) {
7796         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7797         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7798                                              CGF.getContext().getSizeType(),
7799                                              LenExpr->getExprLoc());
7800         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7801       }
7802       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7803              OAE->getLowerBound() && "expected array_section[lb:].");
7804       // Size = sizetype - lb * elemtype;
7805       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7806       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7807       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7808                                        CGF.getContext().getSizeType(),
7809                                        OAE->getLowerBound()->getExprLoc());
7810       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7811       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7812       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7813       LengthVal = CGF.Builder.CreateSelect(
7814           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7815       return LengthVal;
7816     }
7817     return CGF.getTypeSize(ExprTy);
7818   }
7819 
7820   /// Return the corresponding bits for a given map clause modifier. Add
7821   /// a flag marking the map as a pointer if requested. Add a flag marking the
7822   /// map as the first one of a series of maps that relate to the same map
7823   /// expression.
7824   OpenMPOffloadMappingFlags getMapTypeBits(
7825       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7826       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7827     OpenMPOffloadMappingFlags Bits =
7828         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7829     switch (MapType) {
7830     case OMPC_MAP_alloc:
7831     case OMPC_MAP_release:
7832       // alloc and release is the default behavior in the runtime library,  i.e.
7833       // if we don't pass any bits alloc/release that is what the runtime is
7834       // going to do. Therefore, we don't need to signal anything for these two
7835       // type modifiers.
7836       break;
7837     case OMPC_MAP_to:
7838       Bits |= OMP_MAP_TO;
7839       break;
7840     case OMPC_MAP_from:
7841       Bits |= OMP_MAP_FROM;
7842       break;
7843     case OMPC_MAP_tofrom:
7844       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7845       break;
7846     case OMPC_MAP_delete:
7847       Bits |= OMP_MAP_DELETE;
7848       break;
7849     case OMPC_MAP_unknown:
7850       llvm_unreachable("Unexpected map type!");
7851     }
7852     if (AddPtrFlag)
7853       Bits |= OMP_MAP_PTR_AND_OBJ;
7854     if (AddIsTargetParamFlag)
7855       Bits |= OMP_MAP_TARGET_PARAM;
7856     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7857         != MapModifiers.end())
7858       Bits |= OMP_MAP_ALWAYS;
7859     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7860         != MapModifiers.end())
7861       Bits |= OMP_MAP_CLOSE;
7862     return Bits;
7863   }
7864 
7865   /// Return true if the provided expression is a final array section. A
7866   /// final array section, is one whose length can't be proved to be one.
7867   bool isFinalArraySectionExpression(const Expr *E) const {
7868     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7869 
7870     // It is not an array section and therefore not a unity-size one.
7871     if (!OASE)
7872       return false;
7873 
7874     // An array section with no colon always refer to a single element.
7875     if (OASE->getColonLoc().isInvalid())
7876       return false;
7877 
7878     const Expr *Length = OASE->getLength();
7879 
7880     // If we don't have a length we have to check if the array has size 1
7881     // for this dimension. Also, we should always expect a length if the
7882     // base type is pointer.
7883     if (!Length) {
7884       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7885                              OASE->getBase()->IgnoreParenImpCasts())
7886                              .getCanonicalType();
7887       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7888         return ATy->getSize().getSExtValue() != 1;
7889       // If we don't have a constant dimension length, we have to consider
7890       // the current section as having any size, so it is not necessarily
7891       // unitary. If it happen to be unity size, that's user fault.
7892       return true;
7893     }
7894 
7895     // Check if the length evaluates to 1.
7896     Expr::EvalResult Result;
7897     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7898       return true; // Can have more that size 1.
7899 
7900     llvm::APSInt ConstLength = Result.Val.getInt();
7901     return ConstLength.getSExtValue() != 1;
7902   }
7903 
7904   /// Generate the base pointers, section pointers, sizes and map type
7905   /// bits for the provided map type, map modifier, and expression components.
7906   /// \a IsFirstComponent should be set to true if the provided set of
7907   /// components is the first associated with a capture.
7908   void generateInfoForComponentList(
7909       OpenMPMapClauseKind MapType,
7910       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7911       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7912       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7913       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7914       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7915       bool IsImplicit,
7916       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7917           OverlappedElements = llvm::None) const {
7918     // The following summarizes what has to be generated for each map and the
7919     // types below. The generated information is expressed in this order:
7920     // base pointer, section pointer, size, flags
7921     // (to add to the ones that come from the map type and modifier).
7922     //
7923     // double d;
7924     // int i[100];
7925     // float *p;
7926     //
7927     // struct S1 {
7928     //   int i;
7929     //   float f[50];
7930     // }
7931     // struct S2 {
7932     //   int i;
7933     //   float f[50];
7934     //   S1 s;
7935     //   double *p;
7936     //   struct S2 *ps;
7937     // }
7938     // S2 s;
7939     // S2 *ps;
7940     //
7941     // map(d)
7942     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7943     //
7944     // map(i)
7945     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7946     //
7947     // map(i[1:23])
7948     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7949     //
7950     // map(p)
7951     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7952     //
7953     // map(p[1:24])
7954     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7955     //
7956     // map(s)
7957     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7958     //
7959     // map(s.i)
7960     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7961     //
7962     // map(s.s.f)
7963     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7964     //
7965     // map(s.p)
7966     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7967     //
7968     // map(to: s.p[:22])
7969     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7970     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7971     // &(s.p), &(s.p[0]), 22*sizeof(double),
7972     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7973     // (*) alloc space for struct members, only this is a target parameter
7974     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7975     //      optimizes this entry out, same in the examples below)
7976     // (***) map the pointee (map: to)
7977     //
7978     // map(s.ps)
7979     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7980     //
7981     // map(from: s.ps->s.i)
7982     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7983     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7984     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7985     //
7986     // map(to: s.ps->ps)
7987     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7988     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7989     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7990     //
7991     // map(s.ps->ps->ps)
7992     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7993     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7994     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7995     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7996     //
7997     // map(to: s.ps->ps->s.f[:22])
7998     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7999     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8000     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8001     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8002     //
8003     // map(ps)
8004     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
8005     //
8006     // map(ps->i)
8007     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
8008     //
8009     // map(ps->s.f)
8010     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8011     //
8012     // map(from: ps->p)
8013     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
8014     //
8015     // map(to: ps->p[:22])
8016     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
8017     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
8018     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
8019     //
8020     // map(ps->ps)
8021     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8022     //
8023     // map(from: ps->ps->s.i)
8024     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8025     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8026     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8027     //
8028     // map(from: ps->ps->ps)
8029     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8030     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8031     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8032     //
8033     // map(ps->ps->ps->ps)
8034     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8035     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8036     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8037     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8038     //
8039     // map(to: ps->ps->ps->s.f[:22])
8040     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8041     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8042     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8043     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8044     //
8045     // map(to: s.f[:22]) map(from: s.p[:33])
8046     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
8047     //     sizeof(double*) (**), TARGET_PARAM
8048     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
8049     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
8050     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8051     // (*) allocate contiguous space needed to fit all mapped members even if
8052     //     we allocate space for members not mapped (in this example,
8053     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
8054     //     them as well because they fall between &s.f[0] and &s.p)
8055     //
8056     // map(from: s.f[:22]) map(to: ps->p[:33])
8057     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
8058     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8059     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
8060     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
8061     // (*) the struct this entry pertains to is the 2nd element in the list of
8062     //     arguments, hence MEMBER_OF(2)
8063     //
8064     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
8065     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
8066     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
8067     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
8068     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8069     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
8070     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
8071     // (*) the struct this entry pertains to is the 4th element in the list
8072     //     of arguments, hence MEMBER_OF(4)
8073 
8074     // Track if the map information being generated is the first for a capture.
8075     bool IsCaptureFirstInfo = IsFirstComponentList;
8076     // When the variable is on a declare target link or in a to clause with
8077     // unified memory, a reference is needed to hold the host/device address
8078     // of the variable.
8079     bool RequiresReference = false;
8080 
8081     // Scan the components from the base to the complete expression.
8082     auto CI = Components.rbegin();
8083     auto CE = Components.rend();
8084     auto I = CI;
8085 
8086     // Track if the map information being generated is the first for a list of
8087     // components.
8088     bool IsExpressionFirstInfo = true;
8089     Address BP = Address::invalid();
8090     const Expr *AssocExpr = I->getAssociatedExpression();
8091     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
8092     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8093     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
8094 
8095     if (isa<MemberExpr>(AssocExpr)) {
8096       // The base is the 'this' pointer. The content of the pointer is going
8097       // to be the base of the field being mapped.
8098       BP = CGF.LoadCXXThisAddress();
8099     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
8100                (OASE &&
8101                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
8102       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8103     } else if (OAShE &&
8104                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
8105       BP = Address(
8106           CGF.EmitScalarExpr(OAShE->getBase()),
8107           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
8108     } else {
8109       // The base is the reference to the variable.
8110       // BP = &Var.
8111       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8112       if (const auto *VD =
8113               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
8114         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8115                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8116           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8117               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
8118                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
8119             RequiresReference = true;
8120             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
8121           }
8122         }
8123       }
8124 
8125       // If the variable is a pointer and is being dereferenced (i.e. is not
8126       // the last component), the base has to be the pointer itself, not its
8127       // reference. References are ignored for mapping purposes.
8128       QualType Ty =
8129           I->getAssociatedDeclaration()->getType().getNonReferenceType();
8130       if (Ty->isAnyPointerType() && std::next(I) != CE) {
8131         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8132 
8133         // We do not need to generate individual map information for the
8134         // pointer, it can be associated with the combined storage.
8135         ++I;
8136       }
8137     }
8138 
8139     // Track whether a component of the list should be marked as MEMBER_OF some
8140     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8141     // in a component list should be marked as MEMBER_OF, all subsequent entries
8142     // do not belong to the base struct. E.g.
8143     // struct S2 s;
8144     // s.ps->ps->ps->f[:]
8145     //   (1) (2) (3) (4)
8146     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8147     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8148     // is the pointee of ps(2) which is not member of struct s, so it should not
8149     // be marked as such (it is still PTR_AND_OBJ).
8150     // The variable is initialized to false so that PTR_AND_OBJ entries which
8151     // are not struct members are not considered (e.g. array of pointers to
8152     // data).
8153     bool ShouldBeMemberOf = false;
8154 
8155     // Variable keeping track of whether or not we have encountered a component
8156     // in the component list which is a member expression. Useful when we have a
8157     // pointer or a final array section, in which case it is the previous
8158     // component in the list which tells us whether we have a member expression.
8159     // E.g. X.f[:]
8160     // While processing the final array section "[:]" it is "f" which tells us
8161     // whether we are dealing with a member of a declared struct.
8162     const MemberExpr *EncounteredME = nullptr;
8163 
8164     for (; I != CE; ++I) {
8165       // If the current component is member of a struct (parent struct) mark it.
8166       if (!EncounteredME) {
8167         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8168         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8169         // as MEMBER_OF the parent struct.
8170         if (EncounteredME)
8171           ShouldBeMemberOf = true;
8172       }
8173 
8174       auto Next = std::next(I);
8175 
8176       // We need to generate the addresses and sizes if this is the last
8177       // component, if the component is a pointer or if it is an array section
8178       // whose length can't be proved to be one. If this is a pointer, it
8179       // becomes the base address for the following components.
8180 
8181       // A final array section, is one whose length can't be proved to be one.
8182       bool IsFinalArraySection =
8183           isFinalArraySectionExpression(I->getAssociatedExpression());
8184 
8185       // Get information on whether the element is a pointer. Have to do a
8186       // special treatment for array sections given that they are built-in
8187       // types.
8188       const auto *OASE =
8189           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8190       const auto *OAShE =
8191           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8192       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8193       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8194       bool IsPointer =
8195           OAShE ||
8196           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8197                        .getCanonicalType()
8198                        ->isAnyPointerType()) ||
8199           I->getAssociatedExpression()->getType()->isAnyPointerType();
8200       bool IsNonDerefPointer = IsPointer && !UO && !BO;
8201 
8202       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
8203         // If this is not the last component, we expect the pointer to be
8204         // associated with an array expression or member expression.
8205         assert((Next == CE ||
8206                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8207                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8208                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8209                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8210                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8211                "Unexpected expression");
8212 
8213         Address LB = Address::invalid();
8214         if (OAShE) {
8215           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8216                        CGF.getContext().getTypeAlignInChars(
8217                            OAShE->getBase()->getType()));
8218         } else {
8219           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8220                    .getAddress(CGF);
8221         }
8222 
8223         // If this component is a pointer inside the base struct then we don't
8224         // need to create any entry for it - it will be combined with the object
8225         // it is pointing to into a single PTR_AND_OBJ entry.
8226         bool IsMemberPointer =
8227             IsPointer && EncounteredME &&
8228             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
8229              EncounteredME);
8230         if (!OverlappedElements.empty()) {
8231           // Handle base element with the info for overlapped elements.
8232           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8233           assert(Next == CE &&
8234                  "Expected last element for the overlapped elements.");
8235           assert(!IsPointer &&
8236                  "Unexpected base element with the pointer type.");
8237           // Mark the whole struct as the struct that requires allocation on the
8238           // device.
8239           PartialStruct.LowestElem = {0, LB};
8240           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8241               I->getAssociatedExpression()->getType());
8242           Address HB = CGF.Builder.CreateConstGEP(
8243               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
8244                                                               CGF.VoidPtrTy),
8245               TypeSize.getQuantity() - 1);
8246           PartialStruct.HighestElem = {
8247               std::numeric_limits<decltype(
8248                   PartialStruct.HighestElem.first)>::max(),
8249               HB};
8250           PartialStruct.Base = BP;
8251           // Emit data for non-overlapped data.
8252           OpenMPOffloadMappingFlags Flags =
8253               OMP_MAP_MEMBER_OF |
8254               getMapTypeBits(MapType, MapModifiers, IsImplicit,
8255                              /*AddPtrFlag=*/false,
8256                              /*AddIsTargetParamFlag=*/false);
8257           LB = BP;
8258           llvm::Value *Size = nullptr;
8259           // Do bitcopy of all non-overlapped structure elements.
8260           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8261                    Component : OverlappedElements) {
8262             Address ComponentLB = Address::invalid();
8263             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8264                  Component) {
8265               if (MC.getAssociatedDeclaration()) {
8266                 ComponentLB =
8267                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8268                         .getAddress(CGF);
8269                 Size = CGF.Builder.CreatePtrDiff(
8270                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8271                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8272                 break;
8273               }
8274             }
8275             BasePointers.push_back(BP.getPointer());
8276             Pointers.push_back(LB.getPointer());
8277             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
8278                                                       /*isSigned=*/true));
8279             Types.push_back(Flags);
8280             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8281           }
8282           BasePointers.push_back(BP.getPointer());
8283           Pointers.push_back(LB.getPointer());
8284           Size = CGF.Builder.CreatePtrDiff(
8285               CGF.EmitCastToVoidPtr(
8286                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
8287               CGF.EmitCastToVoidPtr(LB.getPointer()));
8288           Sizes.push_back(
8289               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8290           Types.push_back(Flags);
8291           break;
8292         }
8293         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8294         if (!IsMemberPointer) {
8295           BasePointers.push_back(BP.getPointer());
8296           Pointers.push_back(LB.getPointer());
8297           Sizes.push_back(
8298               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8299 
8300           // We need to add a pointer flag for each map that comes from the
8301           // same expression except for the first one. We also need to signal
8302           // this map is the first one that relates with the current capture
8303           // (there is a set of entries for each capture).
8304           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8305               MapType, MapModifiers, IsImplicit,
8306               !IsExpressionFirstInfo || RequiresReference,
8307               IsCaptureFirstInfo && !RequiresReference);
8308 
8309           if (!IsExpressionFirstInfo) {
8310             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8311             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8312             if (IsPointer)
8313               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8314                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8315 
8316             if (ShouldBeMemberOf) {
8317               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8318               // should be later updated with the correct value of MEMBER_OF.
8319               Flags |= OMP_MAP_MEMBER_OF;
8320               // From now on, all subsequent PTR_AND_OBJ entries should not be
8321               // marked as MEMBER_OF.
8322               ShouldBeMemberOf = false;
8323             }
8324           }
8325 
8326           Types.push_back(Flags);
8327         }
8328 
8329         // If we have encountered a member expression so far, keep track of the
8330         // mapped member. If the parent is "*this", then the value declaration
8331         // is nullptr.
8332         if (EncounteredME) {
8333           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8334           unsigned FieldIndex = FD->getFieldIndex();
8335 
8336           // Update info about the lowest and highest elements for this struct
8337           if (!PartialStruct.Base.isValid()) {
8338             PartialStruct.LowestElem = {FieldIndex, LB};
8339             PartialStruct.HighestElem = {FieldIndex, LB};
8340             PartialStruct.Base = BP;
8341           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8342             PartialStruct.LowestElem = {FieldIndex, LB};
8343           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8344             PartialStruct.HighestElem = {FieldIndex, LB};
8345           }
8346         }
8347 
8348         // If we have a final array section, we are done with this expression.
8349         if (IsFinalArraySection)
8350           break;
8351 
8352         // The pointer becomes the base for the next element.
8353         if (Next != CE)
8354           BP = LB;
8355 
8356         IsExpressionFirstInfo = false;
8357         IsCaptureFirstInfo = false;
8358       }
8359     }
8360   }
8361 
8362   /// Return the adjusted map modifiers if the declaration a capture refers to
8363   /// appears in a first-private clause. This is expected to be used only with
8364   /// directives that start with 'target'.
8365   MappableExprsHandler::OpenMPOffloadMappingFlags
8366   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8367     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8368 
8369     // A first private variable captured by reference will use only the
8370     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8371     // declaration is known as first-private in this handler.
8372     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8373       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8374           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8375         return MappableExprsHandler::OMP_MAP_ALWAYS |
8376                MappableExprsHandler::OMP_MAP_TO;
8377       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8378         return MappableExprsHandler::OMP_MAP_TO |
8379                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8380       return MappableExprsHandler::OMP_MAP_PRIVATE |
8381              MappableExprsHandler::OMP_MAP_TO;
8382     }
8383     return MappableExprsHandler::OMP_MAP_TO |
8384            MappableExprsHandler::OMP_MAP_FROM;
8385   }
8386 
8387   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8388     // Rotate by getFlagMemberOffset() bits.
8389     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8390                                                   << getFlagMemberOffset());
8391   }
8392 
8393   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8394                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8395     // If the entry is PTR_AND_OBJ but has not been marked with the special
8396     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8397     // marked as MEMBER_OF.
8398     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8399         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8400       return;
8401 
8402     // Reset the placeholder value to prepare the flag for the assignment of the
8403     // proper MEMBER_OF value.
8404     Flags &= ~OMP_MAP_MEMBER_OF;
8405     Flags |= MemberOfFlag;
8406   }
8407 
8408   void getPlainLayout(const CXXRecordDecl *RD,
8409                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8410                       bool AsBase) const {
8411     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8412 
8413     llvm::StructType *St =
8414         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8415 
8416     unsigned NumElements = St->getNumElements();
8417     llvm::SmallVector<
8418         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8419         RecordLayout(NumElements);
8420 
8421     // Fill bases.
8422     for (const auto &I : RD->bases()) {
8423       if (I.isVirtual())
8424         continue;
8425       const auto *Base = I.getType()->getAsCXXRecordDecl();
8426       // Ignore empty bases.
8427       if (Base->isEmpty() || CGF.getContext()
8428                                  .getASTRecordLayout(Base)
8429                                  .getNonVirtualSize()
8430                                  .isZero())
8431         continue;
8432 
8433       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8434       RecordLayout[FieldIndex] = Base;
8435     }
8436     // Fill in virtual bases.
8437     for (const auto &I : RD->vbases()) {
8438       const auto *Base = I.getType()->getAsCXXRecordDecl();
8439       // Ignore empty bases.
8440       if (Base->isEmpty())
8441         continue;
8442       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8443       if (RecordLayout[FieldIndex])
8444         continue;
8445       RecordLayout[FieldIndex] = Base;
8446     }
8447     // Fill in all the fields.
8448     assert(!RD->isUnion() && "Unexpected union.");
8449     for (const auto *Field : RD->fields()) {
8450       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8451       // will fill in later.)
8452       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8453         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8454         RecordLayout[FieldIndex] = Field;
8455       }
8456     }
8457     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8458              &Data : RecordLayout) {
8459       if (Data.isNull())
8460         continue;
8461       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8462         getPlainLayout(Base, Layout, /*AsBase=*/true);
8463       else
8464         Layout.push_back(Data.get<const FieldDecl *>());
8465     }
8466   }
8467 
8468 public:
8469   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8470       : CurDir(&Dir), CGF(CGF) {
8471     // Extract firstprivate clause information.
8472     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8473       for (const auto *D : C->varlists())
8474         FirstPrivateDecls.try_emplace(
8475             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8476     // Extract device pointer clause information.
8477     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8478       for (auto L : C->component_lists())
8479         DevPointersMap[L.first].push_back(L.second);
8480   }
8481 
8482   /// Constructor for the declare mapper directive.
8483   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8484       : CurDir(&Dir), CGF(CGF) {}
8485 
8486   /// Generate code for the combined entry if we have a partially mapped struct
8487   /// and take care of the mapping flags of the arguments corresponding to
8488   /// individual struct members.
8489   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8490                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8491                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8492                          const StructRangeInfoTy &PartialStruct) const {
8493     // Base is the base of the struct
8494     BasePointers.push_back(PartialStruct.Base.getPointer());
8495     // Pointer is the address of the lowest element
8496     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8497     Pointers.push_back(LB);
8498     // Size is (addr of {highest+1} element) - (addr of lowest element)
8499     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8500     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8501     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8502     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8503     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8504     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8505                                                   /*isSigned=*/false);
8506     Sizes.push_back(Size);
8507     // Map type is always TARGET_PARAM
8508     Types.push_back(OMP_MAP_TARGET_PARAM);
8509     // Remove TARGET_PARAM flag from the first element
8510     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8511 
8512     // All other current entries will be MEMBER_OF the combined entry
8513     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8514     // 0xFFFF in the MEMBER_OF field).
8515     OpenMPOffloadMappingFlags MemberOfFlag =
8516         getMemberOfFlag(BasePointers.size() - 1);
8517     for (auto &M : CurTypes)
8518       setCorrectMemberOfFlag(M, MemberOfFlag);
8519   }
8520 
8521   /// Generate all the base pointers, section pointers, sizes and map
8522   /// types for the extracted mappable expressions. Also, for each item that
8523   /// relates with a device pointer, a pair of the relevant declaration and
8524   /// index where it occurs is appended to the device pointers info array.
8525   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8526                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8527                        MapFlagsArrayTy &Types) const {
8528     // We have to process the component lists that relate with the same
8529     // declaration in a single chunk so that we can generate the map flags
8530     // correctly. Therefore, we organize all lists in a map.
8531     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8532 
8533     // Helper function to fill the information map for the different supported
8534     // clauses.
8535     auto &&InfoGen = [&Info](
8536         const ValueDecl *D,
8537         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8538         OpenMPMapClauseKind MapType,
8539         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8540         bool ReturnDevicePointer, bool IsImplicit) {
8541       const ValueDecl *VD =
8542           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8543       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8544                             IsImplicit);
8545     };
8546 
8547     assert(CurDir.is<const OMPExecutableDirective *>() &&
8548            "Expect a executable directive");
8549     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8550     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8551       for (const auto L : C->component_lists()) {
8552         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8553             /*ReturnDevicePointer=*/false, C->isImplicit());
8554       }
8555     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8556       for (const auto L : C->component_lists()) {
8557         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8558             /*ReturnDevicePointer=*/false, C->isImplicit());
8559       }
8560     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8561       for (const auto L : C->component_lists()) {
8562         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8563             /*ReturnDevicePointer=*/false, C->isImplicit());
8564       }
8565 
8566     // Look at the use_device_ptr clause information and mark the existing map
8567     // entries as such. If there is no map information for an entry in the
8568     // use_device_ptr list, we create one with map type 'alloc' and zero size
8569     // section. It is the user fault if that was not mapped before. If there is
8570     // no map information and the pointer is a struct member, then we defer the
8571     // emission of that entry until the whole struct has been processed.
8572     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8573         DeferredInfo;
8574 
8575     for (const auto *C :
8576          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8577       for (const auto L : C->component_lists()) {
8578         assert(!L.second.empty() && "Not expecting empty list of components!");
8579         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8580         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8581         const Expr *IE = L.second.back().getAssociatedExpression();
8582         // If the first component is a member expression, we have to look into
8583         // 'this', which maps to null in the map of map information. Otherwise
8584         // look directly for the information.
8585         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8586 
8587         // We potentially have map information for this declaration already.
8588         // Look for the first set of components that refer to it.
8589         if (It != Info.end()) {
8590           auto CI = std::find_if(
8591               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8592                 return MI.Components.back().getAssociatedDeclaration() == VD;
8593               });
8594           // If we found a map entry, signal that the pointer has to be returned
8595           // and move on to the next declaration.
8596           if (CI != It->second.end()) {
8597             CI->ReturnDevicePointer = true;
8598             continue;
8599           }
8600         }
8601 
8602         // We didn't find any match in our map information - generate a zero
8603         // size array section - if the pointer is a struct member we defer this
8604         // action until the whole struct has been processed.
8605         if (isa<MemberExpr>(IE)) {
8606           // Insert the pointer into Info to be processed by
8607           // generateInfoForComponentList. Because it is a member pointer
8608           // without a pointee, no entry will be generated for it, therefore
8609           // we need to generate one after the whole struct has been processed.
8610           // Nonetheless, generateInfoForComponentList must be called to take
8611           // the pointer into account for the calculation of the range of the
8612           // partial struct.
8613           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8614                   /*ReturnDevicePointer=*/false, C->isImplicit());
8615           DeferredInfo[nullptr].emplace_back(IE, VD);
8616         } else {
8617           llvm::Value *Ptr =
8618               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8619           BasePointers.emplace_back(Ptr, VD);
8620           Pointers.push_back(Ptr);
8621           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8622           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8623         }
8624       }
8625     }
8626 
8627     for (const auto &M : Info) {
8628       // We need to know when we generate information for the first component
8629       // associated with a capture, because the mapping flags depend on it.
8630       bool IsFirstComponentList = true;
8631 
8632       // Temporary versions of arrays
8633       MapBaseValuesArrayTy CurBasePointers;
8634       MapValuesArrayTy CurPointers;
8635       MapValuesArrayTy CurSizes;
8636       MapFlagsArrayTy CurTypes;
8637       StructRangeInfoTy PartialStruct;
8638 
8639       for (const MapInfo &L : M.second) {
8640         assert(!L.Components.empty() &&
8641                "Not expecting declaration with no component lists.");
8642 
8643         // Remember the current base pointer index.
8644         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8645         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8646                                      CurBasePointers, CurPointers, CurSizes,
8647                                      CurTypes, PartialStruct,
8648                                      IsFirstComponentList, L.IsImplicit);
8649 
8650         // If this entry relates with a device pointer, set the relevant
8651         // declaration and add the 'return pointer' flag.
8652         if (L.ReturnDevicePointer) {
8653           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8654                  "Unexpected number of mapped base pointers.");
8655 
8656           const ValueDecl *RelevantVD =
8657               L.Components.back().getAssociatedDeclaration();
8658           assert(RelevantVD &&
8659                  "No relevant declaration related with device pointer??");
8660 
8661           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8662           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8663         }
8664         IsFirstComponentList = false;
8665       }
8666 
8667       // Append any pending zero-length pointers which are struct members and
8668       // used with use_device_ptr.
8669       auto CI = DeferredInfo.find(M.first);
8670       if (CI != DeferredInfo.end()) {
8671         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8672           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8673           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8674               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8675           CurBasePointers.emplace_back(BasePtr, L.VD);
8676           CurPointers.push_back(Ptr);
8677           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8678           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8679           // value MEMBER_OF=FFFF so that the entry is later updated with the
8680           // correct value of MEMBER_OF.
8681           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8682                              OMP_MAP_MEMBER_OF);
8683         }
8684       }
8685 
8686       // If there is an entry in PartialStruct it means we have a struct with
8687       // individual members mapped. Emit an extra combined entry.
8688       if (PartialStruct.Base.isValid())
8689         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8690                           PartialStruct);
8691 
8692       // We need to append the results of this capture to what we already have.
8693       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8694       Pointers.append(CurPointers.begin(), CurPointers.end());
8695       Sizes.append(CurSizes.begin(), CurSizes.end());
8696       Types.append(CurTypes.begin(), CurTypes.end());
8697     }
8698   }
8699 
8700   /// Generate all the base pointers, section pointers, sizes and map types for
8701   /// the extracted map clauses of user-defined mapper.
8702   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8703                                 MapValuesArrayTy &Pointers,
8704                                 MapValuesArrayTy &Sizes,
8705                                 MapFlagsArrayTy &Types) const {
8706     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8707            "Expect a declare mapper directive");
8708     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8709     // We have to process the component lists that relate with the same
8710     // declaration in a single chunk so that we can generate the map flags
8711     // correctly. Therefore, we organize all lists in a map.
8712     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8713 
8714     // Helper function to fill the information map for the different supported
8715     // clauses.
8716     auto &&InfoGen = [&Info](
8717         const ValueDecl *D,
8718         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8719         OpenMPMapClauseKind MapType,
8720         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8721         bool ReturnDevicePointer, bool IsImplicit) {
8722       const ValueDecl *VD =
8723           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8724       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8725                             IsImplicit);
8726     };
8727 
8728     for (const auto *C : CurMapperDir->clauselists()) {
8729       const auto *MC = cast<OMPMapClause>(C);
8730       for (const auto L : MC->component_lists()) {
8731         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8732                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8733       }
8734     }
8735 
8736     for (const auto &M : Info) {
8737       // We need to know when we generate information for the first component
8738       // associated with a capture, because the mapping flags depend on it.
8739       bool IsFirstComponentList = true;
8740 
8741       // Temporary versions of arrays
8742       MapBaseValuesArrayTy CurBasePointers;
8743       MapValuesArrayTy CurPointers;
8744       MapValuesArrayTy CurSizes;
8745       MapFlagsArrayTy CurTypes;
8746       StructRangeInfoTy PartialStruct;
8747 
8748       for (const MapInfo &L : M.second) {
8749         assert(!L.Components.empty() &&
8750                "Not expecting declaration with no component lists.");
8751         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8752                                      CurBasePointers, CurPointers, CurSizes,
8753                                      CurTypes, PartialStruct,
8754                                      IsFirstComponentList, L.IsImplicit);
8755         IsFirstComponentList = false;
8756       }
8757 
8758       // If there is an entry in PartialStruct it means we have a struct with
8759       // individual members mapped. Emit an extra combined entry.
8760       if (PartialStruct.Base.isValid())
8761         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8762                           PartialStruct);
8763 
8764       // We need to append the results of this capture to what we already have.
8765       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8766       Pointers.append(CurPointers.begin(), CurPointers.end());
8767       Sizes.append(CurSizes.begin(), CurSizes.end());
8768       Types.append(CurTypes.begin(), CurTypes.end());
8769     }
8770   }
8771 
8772   /// Emit capture info for lambdas for variables captured by reference.
8773   void generateInfoForLambdaCaptures(
8774       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8775       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8776       MapFlagsArrayTy &Types,
8777       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8778     const auto *RD = VD->getType()
8779                          .getCanonicalType()
8780                          .getNonReferenceType()
8781                          ->getAsCXXRecordDecl();
8782     if (!RD || !RD->isLambda())
8783       return;
8784     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8785     LValue VDLVal = CGF.MakeAddrLValue(
8786         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8787     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8788     FieldDecl *ThisCapture = nullptr;
8789     RD->getCaptureFields(Captures, ThisCapture);
8790     if (ThisCapture) {
8791       LValue ThisLVal =
8792           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8793       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8794       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8795                                  VDLVal.getPointer(CGF));
8796       BasePointers.push_back(ThisLVal.getPointer(CGF));
8797       Pointers.push_back(ThisLValVal.getPointer(CGF));
8798       Sizes.push_back(
8799           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8800                                     CGF.Int64Ty, /*isSigned=*/true));
8801       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8802                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8803     }
8804     for (const LambdaCapture &LC : RD->captures()) {
8805       if (!LC.capturesVariable())
8806         continue;
8807       const VarDecl *VD = LC.getCapturedVar();
8808       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8809         continue;
8810       auto It = Captures.find(VD);
8811       assert(It != Captures.end() && "Found lambda capture without field.");
8812       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8813       if (LC.getCaptureKind() == LCK_ByRef) {
8814         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8815         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8816                                    VDLVal.getPointer(CGF));
8817         BasePointers.push_back(VarLVal.getPointer(CGF));
8818         Pointers.push_back(VarLValVal.getPointer(CGF));
8819         Sizes.push_back(CGF.Builder.CreateIntCast(
8820             CGF.getTypeSize(
8821                 VD->getType().getCanonicalType().getNonReferenceType()),
8822             CGF.Int64Ty, /*isSigned=*/true));
8823       } else {
8824         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8825         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8826                                    VDLVal.getPointer(CGF));
8827         BasePointers.push_back(VarLVal.getPointer(CGF));
8828         Pointers.push_back(VarRVal.getScalarVal());
8829         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8830       }
8831       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8832                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8833     }
8834   }
8835 
8836   /// Set correct indices for lambdas captures.
8837   void adjustMemberOfForLambdaCaptures(
8838       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8839       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8840       MapFlagsArrayTy &Types) const {
8841     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8842       // Set correct member_of idx for all implicit lambda captures.
8843       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8844                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8845         continue;
8846       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8847       assert(BasePtr && "Unable to find base lambda address.");
8848       int TgtIdx = -1;
8849       for (unsigned J = I; J > 0; --J) {
8850         unsigned Idx = J - 1;
8851         if (Pointers[Idx] != BasePtr)
8852           continue;
8853         TgtIdx = Idx;
8854         break;
8855       }
8856       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8857       // All other current entries will be MEMBER_OF the combined entry
8858       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8859       // 0xFFFF in the MEMBER_OF field).
8860       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8861       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8862     }
8863   }
8864 
8865   /// Generate the base pointers, section pointers, sizes and map types
8866   /// associated to a given capture.
8867   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8868                               llvm::Value *Arg,
8869                               MapBaseValuesArrayTy &BasePointers,
8870                               MapValuesArrayTy &Pointers,
8871                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8872                               StructRangeInfoTy &PartialStruct) const {
8873     assert(!Cap->capturesVariableArrayType() &&
8874            "Not expecting to generate map info for a variable array type!");
8875 
8876     // We need to know when we generating information for the first component
8877     const ValueDecl *VD = Cap->capturesThis()
8878                               ? nullptr
8879                               : Cap->getCapturedVar()->getCanonicalDecl();
8880 
8881     // If this declaration appears in a is_device_ptr clause we just have to
8882     // pass the pointer by value. If it is a reference to a declaration, we just
8883     // pass its value.
8884     if (DevPointersMap.count(VD)) {
8885       BasePointers.emplace_back(Arg, VD);
8886       Pointers.push_back(Arg);
8887       Sizes.push_back(
8888           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8889                                     CGF.Int64Ty, /*isSigned=*/true));
8890       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8891       return;
8892     }
8893 
8894     using MapData =
8895         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8896                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8897     SmallVector<MapData, 4> DeclComponentLists;
8898     assert(CurDir.is<const OMPExecutableDirective *>() &&
8899            "Expect a executable directive");
8900     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8901     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8902       for (const auto L : C->decl_component_lists(VD)) {
8903         assert(L.first == VD &&
8904                "We got information for the wrong declaration??");
8905         assert(!L.second.empty() &&
8906                "Not expecting declaration with no component lists.");
8907         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8908                                         C->getMapTypeModifiers(),
8909                                         C->isImplicit());
8910       }
8911     }
8912 
8913     // Find overlapping elements (including the offset from the base element).
8914     llvm::SmallDenseMap<
8915         const MapData *,
8916         llvm::SmallVector<
8917             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8918         4>
8919         OverlappedData;
8920     size_t Count = 0;
8921     for (const MapData &L : DeclComponentLists) {
8922       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8923       OpenMPMapClauseKind MapType;
8924       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8925       bool IsImplicit;
8926       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8927       ++Count;
8928       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8929         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8930         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8931         auto CI = Components.rbegin();
8932         auto CE = Components.rend();
8933         auto SI = Components1.rbegin();
8934         auto SE = Components1.rend();
8935         for (; CI != CE && SI != SE; ++CI, ++SI) {
8936           if (CI->getAssociatedExpression()->getStmtClass() !=
8937               SI->getAssociatedExpression()->getStmtClass())
8938             break;
8939           // Are we dealing with different variables/fields?
8940           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8941             break;
8942         }
8943         // Found overlapping if, at least for one component, reached the head of
8944         // the components list.
8945         if (CI == CE || SI == SE) {
8946           assert((CI != CE || SI != SE) &&
8947                  "Unexpected full match of the mapping components.");
8948           const MapData &BaseData = CI == CE ? L : L1;
8949           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8950               SI == SE ? Components : Components1;
8951           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8952           OverlappedElements.getSecond().push_back(SubData);
8953         }
8954       }
8955     }
8956     // Sort the overlapped elements for each item.
8957     llvm::SmallVector<const FieldDecl *, 4> Layout;
8958     if (!OverlappedData.empty()) {
8959       if (const auto *CRD =
8960               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8961         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8962       else {
8963         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8964         Layout.append(RD->field_begin(), RD->field_end());
8965       }
8966     }
8967     for (auto &Pair : OverlappedData) {
8968       llvm::sort(
8969           Pair.getSecond(),
8970           [&Layout](
8971               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8972               OMPClauseMappableExprCommon::MappableExprComponentListRef
8973                   Second) {
8974             auto CI = First.rbegin();
8975             auto CE = First.rend();
8976             auto SI = Second.rbegin();
8977             auto SE = Second.rend();
8978             for (; CI != CE && SI != SE; ++CI, ++SI) {
8979               if (CI->getAssociatedExpression()->getStmtClass() !=
8980                   SI->getAssociatedExpression()->getStmtClass())
8981                 break;
8982               // Are we dealing with different variables/fields?
8983               if (CI->getAssociatedDeclaration() !=
8984                   SI->getAssociatedDeclaration())
8985                 break;
8986             }
8987 
8988             // Lists contain the same elements.
8989             if (CI == CE && SI == SE)
8990               return false;
8991 
8992             // List with less elements is less than list with more elements.
8993             if (CI == CE || SI == SE)
8994               return CI == CE;
8995 
8996             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8997             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8998             if (FD1->getParent() == FD2->getParent())
8999               return FD1->getFieldIndex() < FD2->getFieldIndex();
9000             const auto It =
9001                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9002                   return FD == FD1 || FD == FD2;
9003                 });
9004             return *It == FD1;
9005           });
9006     }
9007 
9008     // Associated with a capture, because the mapping flags depend on it.
9009     // Go through all of the elements with the overlapped elements.
9010     for (const auto &Pair : OverlappedData) {
9011       const MapData &L = *Pair.getFirst();
9012       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9013       OpenMPMapClauseKind MapType;
9014       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9015       bool IsImplicit;
9016       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9017       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9018           OverlappedComponents = Pair.getSecond();
9019       bool IsFirstComponentList = true;
9020       generateInfoForComponentList(MapType, MapModifiers, Components,
9021                                    BasePointers, Pointers, Sizes, Types,
9022                                    PartialStruct, IsFirstComponentList,
9023                                    IsImplicit, OverlappedComponents);
9024     }
9025     // Go through other elements without overlapped elements.
9026     bool IsFirstComponentList = OverlappedData.empty();
9027     for (const MapData &L : DeclComponentLists) {
9028       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9029       OpenMPMapClauseKind MapType;
9030       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9031       bool IsImplicit;
9032       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9033       auto It = OverlappedData.find(&L);
9034       if (It == OverlappedData.end())
9035         generateInfoForComponentList(MapType, MapModifiers, Components,
9036                                      BasePointers, Pointers, Sizes, Types,
9037                                      PartialStruct, IsFirstComponentList,
9038                                      IsImplicit);
9039       IsFirstComponentList = false;
9040     }
9041   }
9042 
9043   /// Generate the base pointers, section pointers, sizes and map types
9044   /// associated with the declare target link variables.
9045   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
9046                                         MapValuesArrayTy &Pointers,
9047                                         MapValuesArrayTy &Sizes,
9048                                         MapFlagsArrayTy &Types) const {
9049     assert(CurDir.is<const OMPExecutableDirective *>() &&
9050            "Expect a executable directive");
9051     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9052     // Map other list items in the map clause which are not captured variables
9053     // but "declare target link" global variables.
9054     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9055       for (const auto L : C->component_lists()) {
9056         if (!L.first)
9057           continue;
9058         const auto *VD = dyn_cast<VarDecl>(L.first);
9059         if (!VD)
9060           continue;
9061         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9062             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9063         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9064             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
9065           continue;
9066         StructRangeInfoTy PartialStruct;
9067         generateInfoForComponentList(
9068             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
9069             Pointers, Sizes, Types, PartialStruct,
9070             /*IsFirstComponentList=*/true, C->isImplicit());
9071         assert(!PartialStruct.Base.isValid() &&
9072                "No partial structs for declare target link expected.");
9073       }
9074     }
9075   }
9076 
9077   /// Generate the default map information for a given capture \a CI,
9078   /// record field declaration \a RI and captured value \a CV.
9079   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9080                               const FieldDecl &RI, llvm::Value *CV,
9081                               MapBaseValuesArrayTy &CurBasePointers,
9082                               MapValuesArrayTy &CurPointers,
9083                               MapValuesArrayTy &CurSizes,
9084                               MapFlagsArrayTy &CurMapTypes) const {
9085     bool IsImplicit = true;
9086     // Do the default mapping.
9087     if (CI.capturesThis()) {
9088       CurBasePointers.push_back(CV);
9089       CurPointers.push_back(CV);
9090       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9091       CurSizes.push_back(
9092           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9093                                     CGF.Int64Ty, /*isSigned=*/true));
9094       // Default map type.
9095       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9096     } else if (CI.capturesVariableByCopy()) {
9097       CurBasePointers.push_back(CV);
9098       CurPointers.push_back(CV);
9099       if (!RI.getType()->isAnyPointerType()) {
9100         // We have to signal to the runtime captures passed by value that are
9101         // not pointers.
9102         CurMapTypes.push_back(OMP_MAP_LITERAL);
9103         CurSizes.push_back(CGF.Builder.CreateIntCast(
9104             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9105       } else {
9106         // Pointers are implicitly mapped with a zero size and no flags
9107         // (other than first map that is added for all implicit maps).
9108         CurMapTypes.push_back(OMP_MAP_NONE);
9109         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9110       }
9111       const VarDecl *VD = CI.getCapturedVar();
9112       auto I = FirstPrivateDecls.find(VD);
9113       if (I != FirstPrivateDecls.end())
9114         IsImplicit = I->getSecond();
9115     } else {
9116       assert(CI.capturesVariable() && "Expected captured reference.");
9117       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9118       QualType ElementType = PtrTy->getPointeeType();
9119       CurSizes.push_back(CGF.Builder.CreateIntCast(
9120           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9121       // The default map type for a scalar/complex type is 'to' because by
9122       // default the value doesn't have to be retrieved. For an aggregate
9123       // type, the default is 'tofrom'.
9124       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
9125       const VarDecl *VD = CI.getCapturedVar();
9126       auto I = FirstPrivateDecls.find(VD);
9127       if (I != FirstPrivateDecls.end() &&
9128           VD->getType().isConstant(CGF.getContext())) {
9129         llvm::Constant *Addr =
9130             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9131         // Copy the value of the original variable to the new global copy.
9132         CGF.Builder.CreateMemCpy(
9133             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9134             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9135             CurSizes.back(), /*IsVolatile=*/false);
9136         // Use new global variable as the base pointers.
9137         CurBasePointers.push_back(Addr);
9138         CurPointers.push_back(Addr);
9139       } else {
9140         CurBasePointers.push_back(CV);
9141         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9142           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9143               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9144               AlignmentSource::Decl));
9145           CurPointers.push_back(PtrAddr.getPointer());
9146         } else {
9147           CurPointers.push_back(CV);
9148         }
9149       }
9150       if (I != FirstPrivateDecls.end())
9151         IsImplicit = I->getSecond();
9152     }
9153     // Every default map produces a single argument which is a target parameter.
9154     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
9155 
9156     // Add flag stating this is an implicit map.
9157     if (IsImplicit)
9158       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
9159   }
9160 };
9161 } // anonymous namespace
9162 
9163 /// Emit the arrays used to pass the captures and map information to the
9164 /// offloading runtime library. If there is no map or capture information,
9165 /// return nullptr by reference.
9166 static void
9167 emitOffloadingArrays(CodeGenFunction &CGF,
9168                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
9169                      MappableExprsHandler::MapValuesArrayTy &Pointers,
9170                      MappableExprsHandler::MapValuesArrayTy &Sizes,
9171                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
9172                      CGOpenMPRuntime::TargetDataInfo &Info) {
9173   CodeGenModule &CGM = CGF.CGM;
9174   ASTContext &Ctx = CGF.getContext();
9175 
9176   // Reset the array information.
9177   Info.clearArrayInfo();
9178   Info.NumberOfPtrs = BasePointers.size();
9179 
9180   if (Info.NumberOfPtrs) {
9181     // Detect if we have any capture size requiring runtime evaluation of the
9182     // size so that a constant array could be eventually used.
9183     bool hasRuntimeEvaluationCaptureSize = false;
9184     for (llvm::Value *S : Sizes)
9185       if (!isa<llvm::Constant>(S)) {
9186         hasRuntimeEvaluationCaptureSize = true;
9187         break;
9188       }
9189 
9190     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9191     QualType PointerArrayType = Ctx.getConstantArrayType(
9192         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9193         /*IndexTypeQuals=*/0);
9194 
9195     Info.BasePointersArray =
9196         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9197     Info.PointersArray =
9198         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9199 
9200     // If we don't have any VLA types or other types that require runtime
9201     // evaluation, we can use a constant array for the map sizes, otherwise we
9202     // need to fill up the arrays as we do for the pointers.
9203     QualType Int64Ty =
9204         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9205     if (hasRuntimeEvaluationCaptureSize) {
9206       QualType SizeArrayType = Ctx.getConstantArrayType(
9207           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9208           /*IndexTypeQuals=*/0);
9209       Info.SizesArray =
9210           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9211     } else {
9212       // We expect all the sizes to be constant, so we collect them to create
9213       // a constant array.
9214       SmallVector<llvm::Constant *, 16> ConstSizes;
9215       for (llvm::Value *S : Sizes)
9216         ConstSizes.push_back(cast<llvm::Constant>(S));
9217 
9218       auto *SizesArrayInit = llvm::ConstantArray::get(
9219           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9220       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9221       auto *SizesArrayGbl = new llvm::GlobalVariable(
9222           CGM.getModule(), SizesArrayInit->getType(),
9223           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9224           SizesArrayInit, Name);
9225       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9226       Info.SizesArray = SizesArrayGbl;
9227     }
9228 
9229     // The map types are always constant so we don't need to generate code to
9230     // fill arrays. Instead, we create an array constant.
9231     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
9232     llvm::copy(MapTypes, Mapping.begin());
9233     llvm::Constant *MapTypesArrayInit =
9234         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9235     std::string MaptypesName =
9236         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9237     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9238         CGM.getModule(), MapTypesArrayInit->getType(),
9239         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9240         MapTypesArrayInit, MaptypesName);
9241     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9242     Info.MapTypesArray = MapTypesArrayGbl;
9243 
9244     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9245       llvm::Value *BPVal = *BasePointers[I];
9246       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9247           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9248           Info.BasePointersArray, 0, I);
9249       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9250           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9251       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9252       CGF.Builder.CreateStore(BPVal, BPAddr);
9253 
9254       if (Info.requiresDevicePointerInfo())
9255         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
9256           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9257 
9258       llvm::Value *PVal = Pointers[I];
9259       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9260           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9261           Info.PointersArray, 0, I);
9262       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9263           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9264       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9265       CGF.Builder.CreateStore(PVal, PAddr);
9266 
9267       if (hasRuntimeEvaluationCaptureSize) {
9268         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9269             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9270             Info.SizesArray,
9271             /*Idx0=*/0,
9272             /*Idx1=*/I);
9273         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9274         CGF.Builder.CreateStore(
9275             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
9276             SAddr);
9277       }
9278     }
9279   }
9280 }
9281 
9282 /// Emit the arguments to be passed to the runtime library based on the
9283 /// arrays of pointers, sizes and map types.
9284 static void emitOffloadingArraysArgument(
9285     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9286     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9287     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
9288   CodeGenModule &CGM = CGF.CGM;
9289   if (Info.NumberOfPtrs) {
9290     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9291         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9292         Info.BasePointersArray,
9293         /*Idx0=*/0, /*Idx1=*/0);
9294     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9295         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9296         Info.PointersArray,
9297         /*Idx0=*/0,
9298         /*Idx1=*/0);
9299     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9300         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9301         /*Idx0=*/0, /*Idx1=*/0);
9302     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9303         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9304         Info.MapTypesArray,
9305         /*Idx0=*/0,
9306         /*Idx1=*/0);
9307   } else {
9308     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9309     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9310     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9311     MapTypesArrayArg =
9312         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9313   }
9314 }
9315 
9316 /// Check for inner distribute directive.
9317 static const OMPExecutableDirective *
9318 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9319   const auto *CS = D.getInnermostCapturedStmt();
9320   const auto *Body =
9321       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9322   const Stmt *ChildStmt =
9323       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9324 
9325   if (const auto *NestedDir =
9326           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9327     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9328     switch (D.getDirectiveKind()) {
9329     case OMPD_target:
9330       if (isOpenMPDistributeDirective(DKind))
9331         return NestedDir;
9332       if (DKind == OMPD_teams) {
9333         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9334             /*IgnoreCaptured=*/true);
9335         if (!Body)
9336           return nullptr;
9337         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9338         if (const auto *NND =
9339                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9340           DKind = NND->getDirectiveKind();
9341           if (isOpenMPDistributeDirective(DKind))
9342             return NND;
9343         }
9344       }
9345       return nullptr;
9346     case OMPD_target_teams:
9347       if (isOpenMPDistributeDirective(DKind))
9348         return NestedDir;
9349       return nullptr;
9350     case OMPD_target_parallel:
9351     case OMPD_target_simd:
9352     case OMPD_target_parallel_for:
9353     case OMPD_target_parallel_for_simd:
9354       return nullptr;
9355     case OMPD_target_teams_distribute:
9356     case OMPD_target_teams_distribute_simd:
9357     case OMPD_target_teams_distribute_parallel_for:
9358     case OMPD_target_teams_distribute_parallel_for_simd:
9359     case OMPD_parallel:
9360     case OMPD_for:
9361     case OMPD_parallel_for:
9362     case OMPD_parallel_master:
9363     case OMPD_parallel_sections:
9364     case OMPD_for_simd:
9365     case OMPD_parallel_for_simd:
9366     case OMPD_cancel:
9367     case OMPD_cancellation_point:
9368     case OMPD_ordered:
9369     case OMPD_threadprivate:
9370     case OMPD_allocate:
9371     case OMPD_task:
9372     case OMPD_simd:
9373     case OMPD_sections:
9374     case OMPD_section:
9375     case OMPD_single:
9376     case OMPD_master:
9377     case OMPD_critical:
9378     case OMPD_taskyield:
9379     case OMPD_barrier:
9380     case OMPD_taskwait:
9381     case OMPD_taskgroup:
9382     case OMPD_atomic:
9383     case OMPD_flush:
9384     case OMPD_depobj:
9385     case OMPD_scan:
9386     case OMPD_teams:
9387     case OMPD_target_data:
9388     case OMPD_target_exit_data:
9389     case OMPD_target_enter_data:
9390     case OMPD_distribute:
9391     case OMPD_distribute_simd:
9392     case OMPD_distribute_parallel_for:
9393     case OMPD_distribute_parallel_for_simd:
9394     case OMPD_teams_distribute:
9395     case OMPD_teams_distribute_simd:
9396     case OMPD_teams_distribute_parallel_for:
9397     case OMPD_teams_distribute_parallel_for_simd:
9398     case OMPD_target_update:
9399     case OMPD_declare_simd:
9400     case OMPD_declare_variant:
9401     case OMPD_begin_declare_variant:
9402     case OMPD_end_declare_variant:
9403     case OMPD_declare_target:
9404     case OMPD_end_declare_target:
9405     case OMPD_declare_reduction:
9406     case OMPD_declare_mapper:
9407     case OMPD_taskloop:
9408     case OMPD_taskloop_simd:
9409     case OMPD_master_taskloop:
9410     case OMPD_master_taskloop_simd:
9411     case OMPD_parallel_master_taskloop:
9412     case OMPD_parallel_master_taskloop_simd:
9413     case OMPD_requires:
9414     case OMPD_unknown:
9415       llvm_unreachable("Unexpected directive.");
9416     }
9417   }
9418 
9419   return nullptr;
9420 }
9421 
9422 /// Emit the user-defined mapper function. The code generation follows the
9423 /// pattern in the example below.
9424 /// \code
9425 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9426 ///                                           void *base, void *begin,
9427 ///                                           int64_t size, int64_t type) {
9428 ///   // Allocate space for an array section first.
9429 ///   if (size > 1 && !maptype.IsDelete)
9430 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9431 ///                                 size*sizeof(Ty), clearToFrom(type));
9432 ///   // Map members.
9433 ///   for (unsigned i = 0; i < size; i++) {
9434 ///     // For each component specified by this mapper:
9435 ///     for (auto c : all_components) {
9436 ///       if (c.hasMapper())
9437 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9438 ///                       c.arg_type);
9439 ///       else
9440 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9441 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9442 ///     }
9443 ///   }
9444 ///   // Delete the array section.
9445 ///   if (size > 1 && maptype.IsDelete)
9446 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9447 ///                                 size*sizeof(Ty), clearToFrom(type));
9448 /// }
9449 /// \endcode
9450 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9451                                             CodeGenFunction *CGF) {
9452   if (UDMMap.count(D) > 0)
9453     return;
9454   ASTContext &C = CGM.getContext();
9455   QualType Ty = D->getType();
9456   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9457   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9458   auto *MapperVarDecl =
9459       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9460   SourceLocation Loc = D->getLocation();
9461   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9462 
9463   // Prepare mapper function arguments and attributes.
9464   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9465                               C.VoidPtrTy, ImplicitParamDecl::Other);
9466   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9467                             ImplicitParamDecl::Other);
9468   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9469                              C.VoidPtrTy, ImplicitParamDecl::Other);
9470   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9471                             ImplicitParamDecl::Other);
9472   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9473                             ImplicitParamDecl::Other);
9474   FunctionArgList Args;
9475   Args.push_back(&HandleArg);
9476   Args.push_back(&BaseArg);
9477   Args.push_back(&BeginArg);
9478   Args.push_back(&SizeArg);
9479   Args.push_back(&TypeArg);
9480   const CGFunctionInfo &FnInfo =
9481       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9482   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9483   SmallString<64> TyStr;
9484   llvm::raw_svector_ostream Out(TyStr);
9485   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9486   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9487   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9488                                     Name, &CGM.getModule());
9489   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9490   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9491   // Start the mapper function code generation.
9492   CodeGenFunction MapperCGF(CGM);
9493   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9494   // Compute the starting and end addreses of array elements.
9495   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9496       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9497       C.getPointerType(Int64Ty), Loc);
9498   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9499       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9500       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9501   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9502   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9503       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9504       C.getPointerType(Int64Ty), Loc);
9505   // Prepare common arguments for array initiation and deletion.
9506   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9507       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9508       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9509   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9510       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9511       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9512   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9513       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9514       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9515 
9516   // Emit array initiation if this is an array section and \p MapType indicates
9517   // that memory allocation is required.
9518   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9519   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9520                              ElementSize, HeadBB, /*IsInit=*/true);
9521 
9522   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9523 
9524   // Emit the loop header block.
9525   MapperCGF.EmitBlock(HeadBB);
9526   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9527   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9528   // Evaluate whether the initial condition is satisfied.
9529   llvm::Value *IsEmpty =
9530       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9531   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9532   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9533 
9534   // Emit the loop body block.
9535   MapperCGF.EmitBlock(BodyBB);
9536   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9537       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9538   PtrPHI->addIncoming(PtrBegin, EntryBB);
9539   Address PtrCurrent =
9540       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9541                           .getAlignment()
9542                           .alignmentOfArrayElement(ElementSize));
9543   // Privatize the declared variable of mapper to be the current array element.
9544   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9545   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9546     return MapperCGF
9547         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9548         .getAddress(MapperCGF);
9549   });
9550   (void)Scope.Privatize();
9551 
9552   // Get map clause information. Fill up the arrays with all mapped variables.
9553   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9554   MappableExprsHandler::MapValuesArrayTy Pointers;
9555   MappableExprsHandler::MapValuesArrayTy Sizes;
9556   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9557   MappableExprsHandler MEHandler(*D, MapperCGF);
9558   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9559 
9560   // Call the runtime API __tgt_mapper_num_components to get the number of
9561   // pre-existing components.
9562   llvm::Value *OffloadingArgs[] = {Handle};
9563   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9564       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9565   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9566       PreviousSize,
9567       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9568 
9569   // Fill up the runtime mapper handle for all components.
9570   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9571     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9572         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9573     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9574         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9575     llvm::Value *CurSizeArg = Sizes[I];
9576 
9577     // Extract the MEMBER_OF field from the map type.
9578     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9579     MapperCGF.EmitBlock(MemberBB);
9580     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9581     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9582         OriMapType,
9583         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9584     llvm::BasicBlock *MemberCombineBB =
9585         MapperCGF.createBasicBlock("omp.member.combine");
9586     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9587     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9588     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9589     // Add the number of pre-existing components to the MEMBER_OF field if it
9590     // is valid.
9591     MapperCGF.EmitBlock(MemberCombineBB);
9592     llvm::Value *CombinedMember =
9593         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9594     // Do nothing if it is not a member of previous components.
9595     MapperCGF.EmitBlock(TypeBB);
9596     llvm::PHINode *MemberMapType =
9597         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9598     MemberMapType->addIncoming(OriMapType, MemberBB);
9599     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9600 
9601     // Combine the map type inherited from user-defined mapper with that
9602     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9603     // bits of the \a MapType, which is the input argument of the mapper
9604     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9605     // bits of MemberMapType.
9606     // [OpenMP 5.0], 1.2.6. map-type decay.
9607     //        | alloc |  to   | from  | tofrom | release | delete
9608     // ----------------------------------------------------------
9609     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9610     // to     | alloc |  to   | alloc |   to   | release | delete
9611     // from   | alloc | alloc | from  |  from  | release | delete
9612     // tofrom | alloc |  to   | from  | tofrom | release | delete
9613     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9614         MapType,
9615         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9616                                    MappableExprsHandler::OMP_MAP_FROM));
9617     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9618     llvm::BasicBlock *AllocElseBB =
9619         MapperCGF.createBasicBlock("omp.type.alloc.else");
9620     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9621     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9622     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9623     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9624     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9625     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9626     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9627     MapperCGF.EmitBlock(AllocBB);
9628     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9629         MemberMapType,
9630         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9631                                      MappableExprsHandler::OMP_MAP_FROM)));
9632     MapperCGF.Builder.CreateBr(EndBB);
9633     MapperCGF.EmitBlock(AllocElseBB);
9634     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9635         LeftToFrom,
9636         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9637     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9638     // In case of to, clear OMP_MAP_FROM.
9639     MapperCGF.EmitBlock(ToBB);
9640     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9641         MemberMapType,
9642         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9643     MapperCGF.Builder.CreateBr(EndBB);
9644     MapperCGF.EmitBlock(ToElseBB);
9645     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9646         LeftToFrom,
9647         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9648     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9649     // In case of from, clear OMP_MAP_TO.
9650     MapperCGF.EmitBlock(FromBB);
9651     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9652         MemberMapType,
9653         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9654     // In case of tofrom, do nothing.
9655     MapperCGF.EmitBlock(EndBB);
9656     llvm::PHINode *CurMapType =
9657         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9658     CurMapType->addIncoming(AllocMapType, AllocBB);
9659     CurMapType->addIncoming(ToMapType, ToBB);
9660     CurMapType->addIncoming(FromMapType, FromBB);
9661     CurMapType->addIncoming(MemberMapType, ToElseBB);
9662 
9663     // TODO: call the corresponding mapper function if a user-defined mapper is
9664     // associated with this map clause.
9665     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9666     // data structure.
9667     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9668                                      CurSizeArg, CurMapType};
9669     MapperCGF.EmitRuntimeCall(
9670         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9671         OffloadingArgs);
9672   }
9673 
9674   // Update the pointer to point to the next element that needs to be mapped,
9675   // and check whether we have mapped all elements.
9676   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9677       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9678   PtrPHI->addIncoming(PtrNext, BodyBB);
9679   llvm::Value *IsDone =
9680       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9681   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9682   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9683 
9684   MapperCGF.EmitBlock(ExitBB);
9685   // Emit array deletion if this is an array section and \p MapType indicates
9686   // that deletion is required.
9687   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9688                              ElementSize, DoneBB, /*IsInit=*/false);
9689 
9690   // Emit the function exit block.
9691   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9692   MapperCGF.FinishFunction();
9693   UDMMap.try_emplace(D, Fn);
9694   if (CGF) {
9695     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9696     Decls.second.push_back(D);
9697   }
9698 }
9699 
9700 /// Emit the array initialization or deletion portion for user-defined mapper
9701 /// code generation. First, it evaluates whether an array section is mapped and
9702 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9703 /// true, and \a MapType indicates to not delete this array, array
9704 /// initialization code is generated. If \a IsInit is false, and \a MapType
9705 /// indicates to not this array, array deletion code is generated.
9706 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9707     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9708     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9709     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9710   StringRef Prefix = IsInit ? ".init" : ".del";
9711 
9712   // Evaluate if this is an array section.
9713   llvm::BasicBlock *IsDeleteBB =
9714       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9715   llvm::BasicBlock *BodyBB =
9716       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9717   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9718       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9719   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9720 
9721   // Evaluate if we are going to delete this section.
9722   MapperCGF.EmitBlock(IsDeleteBB);
9723   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9724       MapType,
9725       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9726   llvm::Value *DeleteCond;
9727   if (IsInit) {
9728     DeleteCond = MapperCGF.Builder.CreateIsNull(
9729         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9730   } else {
9731     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9732         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9733   }
9734   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9735 
9736   MapperCGF.EmitBlock(BodyBB);
9737   // Get the array size by multiplying element size and element number (i.e., \p
9738   // Size).
9739   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9740       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9741   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9742   // memory allocation/deletion purpose only.
9743   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9744       MapType,
9745       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9746                                    MappableExprsHandler::OMP_MAP_FROM)));
9747   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9748   // data structure.
9749   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9750   MapperCGF.EmitRuntimeCall(
9751       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9752 }
9753 
9754 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9755     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9756     llvm::Value *DeviceID,
9757     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9758                                      const OMPLoopDirective &D)>
9759         SizeEmitter) {
9760   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9761   const OMPExecutableDirective *TD = &D;
9762   // Get nested teams distribute kind directive, if any.
9763   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9764     TD = getNestedDistributeDirective(CGM.getContext(), D);
9765   if (!TD)
9766     return;
9767   const auto *LD = cast<OMPLoopDirective>(TD);
9768   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9769                                                      PrePostActionTy &) {
9770     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9771       llvm::Value *Args[] = {DeviceID, NumIterations};
9772       CGF.EmitRuntimeCall(
9773           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9774     }
9775   };
9776   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9777 }
9778 
9779 void CGOpenMPRuntime::emitTargetCall(
9780     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9781     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9782     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9783     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9784                                      const OMPLoopDirective &D)>
9785         SizeEmitter) {
9786   if (!CGF.HaveInsertPoint())
9787     return;
9788 
9789   assert(OutlinedFn && "Invalid outlined function!");
9790 
9791   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9792   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9793   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9794   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9795                                             PrePostActionTy &) {
9796     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9797   };
9798   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9799 
9800   CodeGenFunction::OMPTargetDataInfo InputInfo;
9801   llvm::Value *MapTypesArray = nullptr;
9802   // Fill up the pointer arrays and transfer execution to the device.
9803   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9804                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9805                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9806     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9807       // Reverse offloading is not supported, so just execute on the host.
9808       if (RequiresOuterTask) {
9809         CapturedVars.clear();
9810         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9811       }
9812       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9813       return;
9814     }
9815 
9816     // On top of the arrays that were filled up, the target offloading call
9817     // takes as arguments the device id as well as the host pointer. The host
9818     // pointer is used by the runtime library to identify the current target
9819     // region, so it only has to be unique and not necessarily point to
9820     // anything. It could be the pointer to the outlined function that
9821     // implements the target region, but we aren't using that so that the
9822     // compiler doesn't need to keep that, and could therefore inline the host
9823     // function if proven worthwhile during optimization.
9824 
9825     // From this point on, we need to have an ID of the target region defined.
9826     assert(OutlinedFnID && "Invalid outlined function ID!");
9827 
9828     // Emit device ID if any.
9829     llvm::Value *DeviceID;
9830     if (Device.getPointer()) {
9831       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9832               Device.getInt() == OMPC_DEVICE_device_num) &&
9833              "Expected device_num modifier.");
9834       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9835       DeviceID =
9836           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9837     } else {
9838       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9839     }
9840 
9841     // Emit the number of elements in the offloading arrays.
9842     llvm::Value *PointerNum =
9843         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9844 
9845     // Return value of the runtime offloading call.
9846     llvm::Value *Return;
9847 
9848     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9849     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9850 
9851     // Emit tripcount for the target loop-based directive.
9852     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9853 
9854     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9855     // The target region is an outlined function launched by the runtime
9856     // via calls __tgt_target() or __tgt_target_teams().
9857     //
9858     // __tgt_target() launches a target region with one team and one thread,
9859     // executing a serial region.  This master thread may in turn launch
9860     // more threads within its team upon encountering a parallel region,
9861     // however, no additional teams can be launched on the device.
9862     //
9863     // __tgt_target_teams() launches a target region with one or more teams,
9864     // each with one or more threads.  This call is required for target
9865     // constructs such as:
9866     //  'target teams'
9867     //  'target' / 'teams'
9868     //  'target teams distribute parallel for'
9869     //  'target parallel'
9870     // and so on.
9871     //
9872     // Note that on the host and CPU targets, the runtime implementation of
9873     // these calls simply call the outlined function without forking threads.
9874     // The outlined functions themselves have runtime calls to
9875     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9876     // the compiler in emitTeamsCall() and emitParallelCall().
9877     //
9878     // In contrast, on the NVPTX target, the implementation of
9879     // __tgt_target_teams() launches a GPU kernel with the requested number
9880     // of teams and threads so no additional calls to the runtime are required.
9881     if (NumTeams) {
9882       // If we have NumTeams defined this means that we have an enclosed teams
9883       // region. Therefore we also expect to have NumThreads defined. These two
9884       // values should be defined in the presence of a teams directive,
9885       // regardless of having any clauses associated. If the user is using teams
9886       // but no clauses, these two values will be the default that should be
9887       // passed to the runtime library - a 32-bit integer with the value zero.
9888       assert(NumThreads && "Thread limit expression should be available along "
9889                            "with number of teams.");
9890       llvm::Value *OffloadingArgs[] = {DeviceID,
9891                                        OutlinedFnID,
9892                                        PointerNum,
9893                                        InputInfo.BasePointersArray.getPointer(),
9894                                        InputInfo.PointersArray.getPointer(),
9895                                        InputInfo.SizesArray.getPointer(),
9896                                        MapTypesArray,
9897                                        NumTeams,
9898                                        NumThreads};
9899       Return = CGF.EmitRuntimeCall(
9900           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9901                                           : OMPRTL__tgt_target_teams),
9902           OffloadingArgs);
9903     } else {
9904       llvm::Value *OffloadingArgs[] = {DeviceID,
9905                                        OutlinedFnID,
9906                                        PointerNum,
9907                                        InputInfo.BasePointersArray.getPointer(),
9908                                        InputInfo.PointersArray.getPointer(),
9909                                        InputInfo.SizesArray.getPointer(),
9910                                        MapTypesArray};
9911       Return = CGF.EmitRuntimeCall(
9912           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9913                                           : OMPRTL__tgt_target),
9914           OffloadingArgs);
9915     }
9916 
9917     // Check the error code and execute the host version if required.
9918     llvm::BasicBlock *OffloadFailedBlock =
9919         CGF.createBasicBlock("omp_offload.failed");
9920     llvm::BasicBlock *OffloadContBlock =
9921         CGF.createBasicBlock("omp_offload.cont");
9922     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9923     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9924 
9925     CGF.EmitBlock(OffloadFailedBlock);
9926     if (RequiresOuterTask) {
9927       CapturedVars.clear();
9928       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9929     }
9930     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9931     CGF.EmitBranch(OffloadContBlock);
9932 
9933     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9934   };
9935 
9936   // Notify that the host version must be executed.
9937   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9938                     RequiresOuterTask](CodeGenFunction &CGF,
9939                                        PrePostActionTy &) {
9940     if (RequiresOuterTask) {
9941       CapturedVars.clear();
9942       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9943     }
9944     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9945   };
9946 
9947   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9948                           &CapturedVars, RequiresOuterTask,
9949                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9950     // Fill up the arrays with all the captured variables.
9951     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9952     MappableExprsHandler::MapValuesArrayTy Pointers;
9953     MappableExprsHandler::MapValuesArrayTy Sizes;
9954     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9955 
9956     // Get mappable expression information.
9957     MappableExprsHandler MEHandler(D, CGF);
9958     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9959 
9960     auto RI = CS.getCapturedRecordDecl()->field_begin();
9961     auto CV = CapturedVars.begin();
9962     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9963                                               CE = CS.capture_end();
9964          CI != CE; ++CI, ++RI, ++CV) {
9965       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9966       MappableExprsHandler::MapValuesArrayTy CurPointers;
9967       MappableExprsHandler::MapValuesArrayTy CurSizes;
9968       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9969       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9970 
9971       // VLA sizes are passed to the outlined region by copy and do not have map
9972       // information associated.
9973       if (CI->capturesVariableArrayType()) {
9974         CurBasePointers.push_back(*CV);
9975         CurPointers.push_back(*CV);
9976         CurSizes.push_back(CGF.Builder.CreateIntCast(
9977             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9978         // Copy to the device as an argument. No need to retrieve it.
9979         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9980                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9981                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9982       } else {
9983         // If we have any information in the map clause, we use it, otherwise we
9984         // just do a default mapping.
9985         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9986                                          CurSizes, CurMapTypes, PartialStruct);
9987         if (CurBasePointers.empty())
9988           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9989                                            CurPointers, CurSizes, CurMapTypes);
9990         // Generate correct mapping for variables captured by reference in
9991         // lambdas.
9992         if (CI->capturesVariable())
9993           MEHandler.generateInfoForLambdaCaptures(
9994               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9995               CurMapTypes, LambdaPointers);
9996       }
9997       // We expect to have at least an element of information for this capture.
9998       assert(!CurBasePointers.empty() &&
9999              "Non-existing map pointer for capture!");
10000       assert(CurBasePointers.size() == CurPointers.size() &&
10001              CurBasePointers.size() == CurSizes.size() &&
10002              CurBasePointers.size() == CurMapTypes.size() &&
10003              "Inconsistent map information sizes!");
10004 
10005       // If there is an entry in PartialStruct it means we have a struct with
10006       // individual members mapped. Emit an extra combined entry.
10007       if (PartialStruct.Base.isValid())
10008         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
10009                                     CurMapTypes, PartialStruct);
10010 
10011       // We need to append the results of this capture to what we already have.
10012       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
10013       Pointers.append(CurPointers.begin(), CurPointers.end());
10014       Sizes.append(CurSizes.begin(), CurSizes.end());
10015       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
10016     }
10017     // Adjust MEMBER_OF flags for the lambdas captures.
10018     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
10019                                               Pointers, MapTypes);
10020     // Map other list items in the map clause which are not captured variables
10021     // but "declare target link" global variables.
10022     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
10023                                                MapTypes);
10024 
10025     TargetDataInfo Info;
10026     // Fill up the arrays and create the arguments.
10027     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10028     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10029                                  Info.PointersArray, Info.SizesArray,
10030                                  Info.MapTypesArray, Info);
10031     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10032     InputInfo.BasePointersArray =
10033         Address(Info.BasePointersArray, CGM.getPointerAlign());
10034     InputInfo.PointersArray =
10035         Address(Info.PointersArray, CGM.getPointerAlign());
10036     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10037     MapTypesArray = Info.MapTypesArray;
10038     if (RequiresOuterTask)
10039       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10040     else
10041       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10042   };
10043 
10044   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10045                              CodeGenFunction &CGF, PrePostActionTy &) {
10046     if (RequiresOuterTask) {
10047       CodeGenFunction::OMPTargetDataInfo InputInfo;
10048       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10049     } else {
10050       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10051     }
10052   };
10053 
10054   // If we have a target function ID it means that we need to support
10055   // offloading, otherwise, just execute on the host. We need to execute on host
10056   // regardless of the conditional in the if clause if, e.g., the user do not
10057   // specify target triples.
10058   if (OutlinedFnID) {
10059     if (IfCond) {
10060       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10061     } else {
10062       RegionCodeGenTy ThenRCG(TargetThenGen);
10063       ThenRCG(CGF);
10064     }
10065   } else {
10066     RegionCodeGenTy ElseRCG(TargetElseGen);
10067     ElseRCG(CGF);
10068   }
10069 }
10070 
10071 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10072                                                     StringRef ParentName) {
10073   if (!S)
10074     return;
10075 
10076   // Codegen OMP target directives that offload compute to the device.
10077   bool RequiresDeviceCodegen =
10078       isa<OMPExecutableDirective>(S) &&
10079       isOpenMPTargetExecutionDirective(
10080           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10081 
10082   if (RequiresDeviceCodegen) {
10083     const auto &E = *cast<OMPExecutableDirective>(S);
10084     unsigned DeviceID;
10085     unsigned FileID;
10086     unsigned Line;
10087     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10088                              FileID, Line);
10089 
10090     // Is this a target region that should not be emitted as an entry point? If
10091     // so just signal we are done with this target region.
10092     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10093                                                             ParentName, Line))
10094       return;
10095 
10096     switch (E.getDirectiveKind()) {
10097     case OMPD_target:
10098       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10099                                                    cast<OMPTargetDirective>(E));
10100       break;
10101     case OMPD_target_parallel:
10102       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10103           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10104       break;
10105     case OMPD_target_teams:
10106       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10107           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10108       break;
10109     case OMPD_target_teams_distribute:
10110       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10111           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10112       break;
10113     case OMPD_target_teams_distribute_simd:
10114       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10115           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10116       break;
10117     case OMPD_target_parallel_for:
10118       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10119           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10120       break;
10121     case OMPD_target_parallel_for_simd:
10122       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10123           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10124       break;
10125     case OMPD_target_simd:
10126       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10127           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10128       break;
10129     case OMPD_target_teams_distribute_parallel_for:
10130       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10131           CGM, ParentName,
10132           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10133       break;
10134     case OMPD_target_teams_distribute_parallel_for_simd:
10135       CodeGenFunction::
10136           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10137               CGM, ParentName,
10138               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10139       break;
10140     case OMPD_parallel:
10141     case OMPD_for:
10142     case OMPD_parallel_for:
10143     case OMPD_parallel_master:
10144     case OMPD_parallel_sections:
10145     case OMPD_for_simd:
10146     case OMPD_parallel_for_simd:
10147     case OMPD_cancel:
10148     case OMPD_cancellation_point:
10149     case OMPD_ordered:
10150     case OMPD_threadprivate:
10151     case OMPD_allocate:
10152     case OMPD_task:
10153     case OMPD_simd:
10154     case OMPD_sections:
10155     case OMPD_section:
10156     case OMPD_single:
10157     case OMPD_master:
10158     case OMPD_critical:
10159     case OMPD_taskyield:
10160     case OMPD_barrier:
10161     case OMPD_taskwait:
10162     case OMPD_taskgroup:
10163     case OMPD_atomic:
10164     case OMPD_flush:
10165     case OMPD_depobj:
10166     case OMPD_scan:
10167     case OMPD_teams:
10168     case OMPD_target_data:
10169     case OMPD_target_exit_data:
10170     case OMPD_target_enter_data:
10171     case OMPD_distribute:
10172     case OMPD_distribute_simd:
10173     case OMPD_distribute_parallel_for:
10174     case OMPD_distribute_parallel_for_simd:
10175     case OMPD_teams_distribute:
10176     case OMPD_teams_distribute_simd:
10177     case OMPD_teams_distribute_parallel_for:
10178     case OMPD_teams_distribute_parallel_for_simd:
10179     case OMPD_target_update:
10180     case OMPD_declare_simd:
10181     case OMPD_declare_variant:
10182     case OMPD_begin_declare_variant:
10183     case OMPD_end_declare_variant:
10184     case OMPD_declare_target:
10185     case OMPD_end_declare_target:
10186     case OMPD_declare_reduction:
10187     case OMPD_declare_mapper:
10188     case OMPD_taskloop:
10189     case OMPD_taskloop_simd:
10190     case OMPD_master_taskloop:
10191     case OMPD_master_taskloop_simd:
10192     case OMPD_parallel_master_taskloop:
10193     case OMPD_parallel_master_taskloop_simd:
10194     case OMPD_requires:
10195     case OMPD_unknown:
10196       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10197     }
10198     return;
10199   }
10200 
10201   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10202     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10203       return;
10204 
10205     scanForTargetRegionsFunctions(
10206         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
10207     return;
10208   }
10209 
10210   // If this is a lambda function, look into its body.
10211   if (const auto *L = dyn_cast<LambdaExpr>(S))
10212     S = L->getBody();
10213 
10214   // Keep looking for target regions recursively.
10215   for (const Stmt *II : S->children())
10216     scanForTargetRegionsFunctions(II, ParentName);
10217 }
10218 
10219 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10220   // If emitting code for the host, we do not process FD here. Instead we do
10221   // the normal code generation.
10222   if (!CGM.getLangOpts().OpenMPIsDevice) {
10223     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10224       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10225           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10226       // Do not emit device_type(nohost) functions for the host.
10227       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10228         return true;
10229     }
10230     return false;
10231   }
10232 
10233   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10234   // Try to detect target regions in the function.
10235   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10236     StringRef Name = CGM.getMangledName(GD);
10237     scanForTargetRegionsFunctions(FD->getBody(), Name);
10238     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10239         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10240     // Do not emit device_type(nohost) functions for the host.
10241     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10242       return true;
10243   }
10244 
10245   // Do not to emit function if it is not marked as declare target.
10246   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10247          AlreadyEmittedTargetDecls.count(VD) == 0;
10248 }
10249 
10250 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10251   if (!CGM.getLangOpts().OpenMPIsDevice)
10252     return false;
10253 
10254   // Check if there are Ctors/Dtors in this declaration and look for target
10255   // regions in it. We use the complete variant to produce the kernel name
10256   // mangling.
10257   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10258   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10259     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10260       StringRef ParentName =
10261           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10262       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10263     }
10264     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10265       StringRef ParentName =
10266           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10267       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10268     }
10269   }
10270 
10271   // Do not to emit variable if it is not marked as declare target.
10272   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10273       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10274           cast<VarDecl>(GD.getDecl()));
10275   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10276       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10277        HasRequiresUnifiedSharedMemory)) {
10278     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10279     return true;
10280   }
10281   return false;
10282 }
10283 
10284 llvm::Constant *
10285 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10286                                                 const VarDecl *VD) {
10287   assert(VD->getType().isConstant(CGM.getContext()) &&
10288          "Expected constant variable.");
10289   StringRef VarName;
10290   llvm::Constant *Addr;
10291   llvm::GlobalValue::LinkageTypes Linkage;
10292   QualType Ty = VD->getType();
10293   SmallString<128> Buffer;
10294   {
10295     unsigned DeviceID;
10296     unsigned FileID;
10297     unsigned Line;
10298     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10299                              FileID, Line);
10300     llvm::raw_svector_ostream OS(Buffer);
10301     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10302        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10303     VarName = OS.str();
10304   }
10305   Linkage = llvm::GlobalValue::InternalLinkage;
10306   Addr =
10307       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10308                                   getDefaultFirstprivateAddressSpace());
10309   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10310   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10311   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10312   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10313       VarName, Addr, VarSize,
10314       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10315   return Addr;
10316 }
10317 
10318 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10319                                                    llvm::Constant *Addr) {
10320   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10321       !CGM.getLangOpts().OpenMPIsDevice)
10322     return;
10323   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10324       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10325   if (!Res) {
10326     if (CGM.getLangOpts().OpenMPIsDevice) {
10327       // Register non-target variables being emitted in device code (debug info
10328       // may cause this).
10329       StringRef VarName = CGM.getMangledName(VD);
10330       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10331     }
10332     return;
10333   }
10334   // Register declare target variables.
10335   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10336   StringRef VarName;
10337   CharUnits VarSize;
10338   llvm::GlobalValue::LinkageTypes Linkage;
10339 
10340   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10341       !HasRequiresUnifiedSharedMemory) {
10342     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10343     VarName = CGM.getMangledName(VD);
10344     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10345       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10346       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10347     } else {
10348       VarSize = CharUnits::Zero();
10349     }
10350     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10351     // Temp solution to prevent optimizations of the internal variables.
10352     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10353       std::string RefName = getName({VarName, "ref"});
10354       if (!CGM.GetGlobalValue(RefName)) {
10355         llvm::Constant *AddrRef =
10356             getOrCreateInternalVariable(Addr->getType(), RefName);
10357         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10358         GVAddrRef->setConstant(/*Val=*/true);
10359         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10360         GVAddrRef->setInitializer(Addr);
10361         CGM.addCompilerUsedGlobal(GVAddrRef);
10362       }
10363     }
10364   } else {
10365     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10366             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10367              HasRequiresUnifiedSharedMemory)) &&
10368            "Declare target attribute must link or to with unified memory.");
10369     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10370       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10371     else
10372       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10373 
10374     if (CGM.getLangOpts().OpenMPIsDevice) {
10375       VarName = Addr->getName();
10376       Addr = nullptr;
10377     } else {
10378       VarName = getAddrOfDeclareTargetVar(VD).getName();
10379       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10380     }
10381     VarSize = CGM.getPointerSize();
10382     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10383   }
10384 
10385   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10386       VarName, Addr, VarSize, Flags, Linkage);
10387 }
10388 
10389 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10390   if (isa<FunctionDecl>(GD.getDecl()) ||
10391       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10392     return emitTargetFunctions(GD);
10393 
10394   return emitTargetGlobalVariable(GD);
10395 }
10396 
10397 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10398   for (const VarDecl *VD : DeferredGlobalVariables) {
10399     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10400         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10401     if (!Res)
10402       continue;
10403     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10404         !HasRequiresUnifiedSharedMemory) {
10405       CGM.EmitGlobal(VD);
10406     } else {
10407       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10408               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10409                HasRequiresUnifiedSharedMemory)) &&
10410              "Expected link clause or to clause with unified memory.");
10411       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10412     }
10413   }
10414 }
10415 
10416 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10417     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10418   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10419          " Expected target-based directive.");
10420 }
10421 
10422 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10423   for (const OMPClause *Clause : D->clauselists()) {
10424     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10425       HasRequiresUnifiedSharedMemory = true;
10426     } else if (const auto *AC =
10427                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10428       switch (AC->getAtomicDefaultMemOrderKind()) {
10429       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10430         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10431         break;
10432       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10433         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10434         break;
10435       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10436         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10437         break;
10438       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10439         break;
10440       }
10441     }
10442   }
10443 }
10444 
10445 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10446   return RequiresAtomicOrdering;
10447 }
10448 
10449 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10450                                                        LangAS &AS) {
10451   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10452     return false;
10453   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10454   switch(A->getAllocatorType()) {
10455   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10456   // Not supported, fallback to the default mem space.
10457   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10458   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10459   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10460   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10461   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10462   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10463   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10464     AS = LangAS::Default;
10465     return true;
10466   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10467     llvm_unreachable("Expected predefined allocator for the variables with the "
10468                      "static storage.");
10469   }
10470   return false;
10471 }
10472 
10473 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10474   return HasRequiresUnifiedSharedMemory;
10475 }
10476 
10477 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10478     CodeGenModule &CGM)
10479     : CGM(CGM) {
10480   if (CGM.getLangOpts().OpenMPIsDevice) {
10481     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10482     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10483   }
10484 }
10485 
10486 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10487   if (CGM.getLangOpts().OpenMPIsDevice)
10488     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10489 }
10490 
10491 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10492   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10493     return true;
10494 
10495   const auto *D = cast<FunctionDecl>(GD.getDecl());
10496   // Do not to emit function if it is marked as declare target as it was already
10497   // emitted.
10498   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10499     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10500       if (auto *F = dyn_cast_or_null<llvm::Function>(
10501               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10502         return !F->isDeclaration();
10503       return false;
10504     }
10505     return true;
10506   }
10507 
10508   return !AlreadyEmittedTargetDecls.insert(D).second;
10509 }
10510 
10511 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10512   // If we don't have entries or if we are emitting code for the device, we
10513   // don't need to do anything.
10514   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10515       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10516       (OffloadEntriesInfoManager.empty() &&
10517        !HasEmittedDeclareTargetRegion &&
10518        !HasEmittedTargetRegion))
10519     return nullptr;
10520 
10521   // Create and register the function that handles the requires directives.
10522   ASTContext &C = CGM.getContext();
10523 
10524   llvm::Function *RequiresRegFn;
10525   {
10526     CodeGenFunction CGF(CGM);
10527     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10528     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10529     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10530     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10531     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10532     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10533     // TODO: check for other requires clauses.
10534     // The requires directive takes effect only when a target region is
10535     // present in the compilation unit. Otherwise it is ignored and not
10536     // passed to the runtime. This avoids the runtime from throwing an error
10537     // for mismatching requires clauses across compilation units that don't
10538     // contain at least 1 target region.
10539     assert((HasEmittedTargetRegion ||
10540             HasEmittedDeclareTargetRegion ||
10541             !OffloadEntriesInfoManager.empty()) &&
10542            "Target or declare target region expected.");
10543     if (HasRequiresUnifiedSharedMemory)
10544       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10545     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10546         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10547     CGF.FinishFunction();
10548   }
10549   return RequiresRegFn;
10550 }
10551 
10552 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10553                                     const OMPExecutableDirective &D,
10554                                     SourceLocation Loc,
10555                                     llvm::Function *OutlinedFn,
10556                                     ArrayRef<llvm::Value *> CapturedVars) {
10557   if (!CGF.HaveInsertPoint())
10558     return;
10559 
10560   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10561   CodeGenFunction::RunCleanupsScope Scope(CGF);
10562 
10563   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10564   llvm::Value *Args[] = {
10565       RTLoc,
10566       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10567       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10568   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10569   RealArgs.append(std::begin(Args), std::end(Args));
10570   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10571 
10572   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10573   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10574 }
10575 
10576 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10577                                          const Expr *NumTeams,
10578                                          const Expr *ThreadLimit,
10579                                          SourceLocation Loc) {
10580   if (!CGF.HaveInsertPoint())
10581     return;
10582 
10583   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10584 
10585   llvm::Value *NumTeamsVal =
10586       NumTeams
10587           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10588                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10589           : CGF.Builder.getInt32(0);
10590 
10591   llvm::Value *ThreadLimitVal =
10592       ThreadLimit
10593           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10594                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10595           : CGF.Builder.getInt32(0);
10596 
10597   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10598   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10599                                      ThreadLimitVal};
10600   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10601                       PushNumTeamsArgs);
10602 }
10603 
10604 void CGOpenMPRuntime::emitTargetDataCalls(
10605     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10606     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10607   if (!CGF.HaveInsertPoint())
10608     return;
10609 
10610   // Action used to replace the default codegen action and turn privatization
10611   // off.
10612   PrePostActionTy NoPrivAction;
10613 
10614   // Generate the code for the opening of the data environment. Capture all the
10615   // arguments of the runtime call by reference because they are used in the
10616   // closing of the region.
10617   auto &&BeginThenGen = [this, &D, Device, &Info,
10618                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10619     // Fill up the arrays with all the mapped variables.
10620     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10621     MappableExprsHandler::MapValuesArrayTy Pointers;
10622     MappableExprsHandler::MapValuesArrayTy Sizes;
10623     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10624 
10625     // Get map clause information.
10626     MappableExprsHandler MCHandler(D, CGF);
10627     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10628 
10629     // Fill up the arrays and create the arguments.
10630     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10631 
10632     llvm::Value *BasePointersArrayArg = nullptr;
10633     llvm::Value *PointersArrayArg = nullptr;
10634     llvm::Value *SizesArrayArg = nullptr;
10635     llvm::Value *MapTypesArrayArg = nullptr;
10636     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10637                                  SizesArrayArg, MapTypesArrayArg, Info);
10638 
10639     // Emit device ID if any.
10640     llvm::Value *DeviceID = nullptr;
10641     if (Device) {
10642       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10643                                            CGF.Int64Ty, /*isSigned=*/true);
10644     } else {
10645       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10646     }
10647 
10648     // Emit the number of elements in the offloading arrays.
10649     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10650 
10651     llvm::Value *OffloadingArgs[] = {
10652         DeviceID,         PointerNum,    BasePointersArrayArg,
10653         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10654     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10655                         OffloadingArgs);
10656 
10657     // If device pointer privatization is required, emit the body of the region
10658     // here. It will have to be duplicated: with and without privatization.
10659     if (!Info.CaptureDeviceAddrMap.empty())
10660       CodeGen(CGF);
10661   };
10662 
10663   // Generate code for the closing of the data region.
10664   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10665                                             PrePostActionTy &) {
10666     assert(Info.isValid() && "Invalid data environment closing arguments.");
10667 
10668     llvm::Value *BasePointersArrayArg = nullptr;
10669     llvm::Value *PointersArrayArg = nullptr;
10670     llvm::Value *SizesArrayArg = nullptr;
10671     llvm::Value *MapTypesArrayArg = nullptr;
10672     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10673                                  SizesArrayArg, MapTypesArrayArg, Info);
10674 
10675     // Emit device ID if any.
10676     llvm::Value *DeviceID = nullptr;
10677     if (Device) {
10678       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10679                                            CGF.Int64Ty, /*isSigned=*/true);
10680     } else {
10681       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10682     }
10683 
10684     // Emit the number of elements in the offloading arrays.
10685     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10686 
10687     llvm::Value *OffloadingArgs[] = {
10688         DeviceID,         PointerNum,    BasePointersArrayArg,
10689         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10690     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10691                         OffloadingArgs);
10692   };
10693 
10694   // If we need device pointer privatization, we need to emit the body of the
10695   // region with no privatization in the 'else' branch of the conditional.
10696   // Otherwise, we don't have to do anything.
10697   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10698                                                          PrePostActionTy &) {
10699     if (!Info.CaptureDeviceAddrMap.empty()) {
10700       CodeGen.setAction(NoPrivAction);
10701       CodeGen(CGF);
10702     }
10703   };
10704 
10705   // We don't have to do anything to close the region if the if clause evaluates
10706   // to false.
10707   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10708 
10709   if (IfCond) {
10710     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10711   } else {
10712     RegionCodeGenTy RCG(BeginThenGen);
10713     RCG(CGF);
10714   }
10715 
10716   // If we don't require privatization of device pointers, we emit the body in
10717   // between the runtime calls. This avoids duplicating the body code.
10718   if (Info.CaptureDeviceAddrMap.empty()) {
10719     CodeGen.setAction(NoPrivAction);
10720     CodeGen(CGF);
10721   }
10722 
10723   if (IfCond) {
10724     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10725   } else {
10726     RegionCodeGenTy RCG(EndThenGen);
10727     RCG(CGF);
10728   }
10729 }
10730 
10731 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10732     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10733     const Expr *Device) {
10734   if (!CGF.HaveInsertPoint())
10735     return;
10736 
10737   assert((isa<OMPTargetEnterDataDirective>(D) ||
10738           isa<OMPTargetExitDataDirective>(D) ||
10739           isa<OMPTargetUpdateDirective>(D)) &&
10740          "Expecting either target enter, exit data, or update directives.");
10741 
10742   CodeGenFunction::OMPTargetDataInfo InputInfo;
10743   llvm::Value *MapTypesArray = nullptr;
10744   // Generate the code for the opening of the data environment.
10745   auto &&ThenGen = [this, &D, Device, &InputInfo,
10746                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10747     // Emit device ID if any.
10748     llvm::Value *DeviceID = nullptr;
10749     if (Device) {
10750       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10751                                            CGF.Int64Ty, /*isSigned=*/true);
10752     } else {
10753       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10754     }
10755 
10756     // Emit the number of elements in the offloading arrays.
10757     llvm::Constant *PointerNum =
10758         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10759 
10760     llvm::Value *OffloadingArgs[] = {DeviceID,
10761                                      PointerNum,
10762                                      InputInfo.BasePointersArray.getPointer(),
10763                                      InputInfo.PointersArray.getPointer(),
10764                                      InputInfo.SizesArray.getPointer(),
10765                                      MapTypesArray};
10766 
10767     // Select the right runtime function call for each expected standalone
10768     // directive.
10769     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10770     OpenMPRTLFunction RTLFn;
10771     switch (D.getDirectiveKind()) {
10772     case OMPD_target_enter_data:
10773       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10774                         : OMPRTL__tgt_target_data_begin;
10775       break;
10776     case OMPD_target_exit_data:
10777       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10778                         : OMPRTL__tgt_target_data_end;
10779       break;
10780     case OMPD_target_update:
10781       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10782                         : OMPRTL__tgt_target_data_update;
10783       break;
10784     case OMPD_parallel:
10785     case OMPD_for:
10786     case OMPD_parallel_for:
10787     case OMPD_parallel_master:
10788     case OMPD_parallel_sections:
10789     case OMPD_for_simd:
10790     case OMPD_parallel_for_simd:
10791     case OMPD_cancel:
10792     case OMPD_cancellation_point:
10793     case OMPD_ordered:
10794     case OMPD_threadprivate:
10795     case OMPD_allocate:
10796     case OMPD_task:
10797     case OMPD_simd:
10798     case OMPD_sections:
10799     case OMPD_section:
10800     case OMPD_single:
10801     case OMPD_master:
10802     case OMPD_critical:
10803     case OMPD_taskyield:
10804     case OMPD_barrier:
10805     case OMPD_taskwait:
10806     case OMPD_taskgroup:
10807     case OMPD_atomic:
10808     case OMPD_flush:
10809     case OMPD_depobj:
10810     case OMPD_scan:
10811     case OMPD_teams:
10812     case OMPD_target_data:
10813     case OMPD_distribute:
10814     case OMPD_distribute_simd:
10815     case OMPD_distribute_parallel_for:
10816     case OMPD_distribute_parallel_for_simd:
10817     case OMPD_teams_distribute:
10818     case OMPD_teams_distribute_simd:
10819     case OMPD_teams_distribute_parallel_for:
10820     case OMPD_teams_distribute_parallel_for_simd:
10821     case OMPD_declare_simd:
10822     case OMPD_declare_variant:
10823     case OMPD_begin_declare_variant:
10824     case OMPD_end_declare_variant:
10825     case OMPD_declare_target:
10826     case OMPD_end_declare_target:
10827     case OMPD_declare_reduction:
10828     case OMPD_declare_mapper:
10829     case OMPD_taskloop:
10830     case OMPD_taskloop_simd:
10831     case OMPD_master_taskloop:
10832     case OMPD_master_taskloop_simd:
10833     case OMPD_parallel_master_taskloop:
10834     case OMPD_parallel_master_taskloop_simd:
10835     case OMPD_target:
10836     case OMPD_target_simd:
10837     case OMPD_target_teams_distribute:
10838     case OMPD_target_teams_distribute_simd:
10839     case OMPD_target_teams_distribute_parallel_for:
10840     case OMPD_target_teams_distribute_parallel_for_simd:
10841     case OMPD_target_teams:
10842     case OMPD_target_parallel:
10843     case OMPD_target_parallel_for:
10844     case OMPD_target_parallel_for_simd:
10845     case OMPD_requires:
10846     case OMPD_unknown:
10847       llvm_unreachable("Unexpected standalone target data directive.");
10848       break;
10849     }
10850     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10851   };
10852 
10853   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10854                              CodeGenFunction &CGF, PrePostActionTy &) {
10855     // Fill up the arrays with all the mapped variables.
10856     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10857     MappableExprsHandler::MapValuesArrayTy Pointers;
10858     MappableExprsHandler::MapValuesArrayTy Sizes;
10859     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10860 
10861     // Get map clause information.
10862     MappableExprsHandler MEHandler(D, CGF);
10863     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10864 
10865     TargetDataInfo Info;
10866     // Fill up the arrays and create the arguments.
10867     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10868     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10869                                  Info.PointersArray, Info.SizesArray,
10870                                  Info.MapTypesArray, Info);
10871     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10872     InputInfo.BasePointersArray =
10873         Address(Info.BasePointersArray, CGM.getPointerAlign());
10874     InputInfo.PointersArray =
10875         Address(Info.PointersArray, CGM.getPointerAlign());
10876     InputInfo.SizesArray =
10877         Address(Info.SizesArray, CGM.getPointerAlign());
10878     MapTypesArray = Info.MapTypesArray;
10879     if (D.hasClausesOfKind<OMPDependClause>())
10880       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10881     else
10882       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10883   };
10884 
10885   if (IfCond) {
10886     emitIfClause(CGF, IfCond, TargetThenGen,
10887                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10888   } else {
10889     RegionCodeGenTy ThenRCG(TargetThenGen);
10890     ThenRCG(CGF);
10891   }
10892 }
10893 
10894 namespace {
10895   /// Kind of parameter in a function with 'declare simd' directive.
10896   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10897   /// Attribute set of the parameter.
10898   struct ParamAttrTy {
10899     ParamKindTy Kind = Vector;
10900     llvm::APSInt StrideOrArg;
10901     llvm::APSInt Alignment;
10902   };
10903 } // namespace
10904 
10905 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10906                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10907   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10908   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10909   // of that clause. The VLEN value must be power of 2.
10910   // In other case the notion of the function`s "characteristic data type" (CDT)
10911   // is used to compute the vector length.
10912   // CDT is defined in the following order:
10913   //   a) For non-void function, the CDT is the return type.
10914   //   b) If the function has any non-uniform, non-linear parameters, then the
10915   //   CDT is the type of the first such parameter.
10916   //   c) If the CDT determined by a) or b) above is struct, union, or class
10917   //   type which is pass-by-value (except for the type that maps to the
10918   //   built-in complex data type), the characteristic data type is int.
10919   //   d) If none of the above three cases is applicable, the CDT is int.
10920   // The VLEN is then determined based on the CDT and the size of vector
10921   // register of that ISA for which current vector version is generated. The
10922   // VLEN is computed using the formula below:
10923   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10924   // where vector register size specified in section 3.2.1 Registers and the
10925   // Stack Frame of original AMD64 ABI document.
10926   QualType RetType = FD->getReturnType();
10927   if (RetType.isNull())
10928     return 0;
10929   ASTContext &C = FD->getASTContext();
10930   QualType CDT;
10931   if (!RetType.isNull() && !RetType->isVoidType()) {
10932     CDT = RetType;
10933   } else {
10934     unsigned Offset = 0;
10935     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10936       if (ParamAttrs[Offset].Kind == Vector)
10937         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10938       ++Offset;
10939     }
10940     if (CDT.isNull()) {
10941       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10942         if (ParamAttrs[I + Offset].Kind == Vector) {
10943           CDT = FD->getParamDecl(I)->getType();
10944           break;
10945         }
10946       }
10947     }
10948   }
10949   if (CDT.isNull())
10950     CDT = C.IntTy;
10951   CDT = CDT->getCanonicalTypeUnqualified();
10952   if (CDT->isRecordType() || CDT->isUnionType())
10953     CDT = C.IntTy;
10954   return C.getTypeSize(CDT);
10955 }
10956 
10957 static void
10958 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10959                            const llvm::APSInt &VLENVal,
10960                            ArrayRef<ParamAttrTy> ParamAttrs,
10961                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10962   struct ISADataTy {
10963     char ISA;
10964     unsigned VecRegSize;
10965   };
10966   ISADataTy ISAData[] = {
10967       {
10968           'b', 128
10969       }, // SSE
10970       {
10971           'c', 256
10972       }, // AVX
10973       {
10974           'd', 256
10975       }, // AVX2
10976       {
10977           'e', 512
10978       }, // AVX512
10979   };
10980   llvm::SmallVector<char, 2> Masked;
10981   switch (State) {
10982   case OMPDeclareSimdDeclAttr::BS_Undefined:
10983     Masked.push_back('N');
10984     Masked.push_back('M');
10985     break;
10986   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10987     Masked.push_back('N');
10988     break;
10989   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10990     Masked.push_back('M');
10991     break;
10992   }
10993   for (char Mask : Masked) {
10994     for (const ISADataTy &Data : ISAData) {
10995       SmallString<256> Buffer;
10996       llvm::raw_svector_ostream Out(Buffer);
10997       Out << "_ZGV" << Data.ISA << Mask;
10998       if (!VLENVal) {
10999         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11000         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11001         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11002       } else {
11003         Out << VLENVal;
11004       }
11005       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11006         switch (ParamAttr.Kind){
11007         case LinearWithVarStride:
11008           Out << 's' << ParamAttr.StrideOrArg;
11009           break;
11010         case Linear:
11011           Out << 'l';
11012           if (!!ParamAttr.StrideOrArg)
11013             Out << ParamAttr.StrideOrArg;
11014           break;
11015         case Uniform:
11016           Out << 'u';
11017           break;
11018         case Vector:
11019           Out << 'v';
11020           break;
11021         }
11022         if (!!ParamAttr.Alignment)
11023           Out << 'a' << ParamAttr.Alignment;
11024       }
11025       Out << '_' << Fn->getName();
11026       Fn->addFnAttr(Out.str());
11027     }
11028   }
11029 }
11030 
11031 // This are the Functions that are needed to mangle the name of the
11032 // vector functions generated by the compiler, according to the rules
11033 // defined in the "Vector Function ABI specifications for AArch64",
11034 // available at
11035 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11036 
11037 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11038 ///
11039 /// TODO: Need to implement the behavior for reference marked with a
11040 /// var or no linear modifiers (1.b in the section). For this, we
11041 /// need to extend ParamKindTy to support the linear modifiers.
11042 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11043   QT = QT.getCanonicalType();
11044 
11045   if (QT->isVoidType())
11046     return false;
11047 
11048   if (Kind == ParamKindTy::Uniform)
11049     return false;
11050 
11051   if (Kind == ParamKindTy::Linear)
11052     return false;
11053 
11054   // TODO: Handle linear references with modifiers
11055 
11056   if (Kind == ParamKindTy::LinearWithVarStride)
11057     return false;
11058 
11059   return true;
11060 }
11061 
11062 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11063 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11064   QT = QT.getCanonicalType();
11065   unsigned Size = C.getTypeSize(QT);
11066 
11067   // Only scalars and complex within 16 bytes wide set PVB to true.
11068   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11069     return false;
11070 
11071   if (QT->isFloatingType())
11072     return true;
11073 
11074   if (QT->isIntegerType())
11075     return true;
11076 
11077   if (QT->isPointerType())
11078     return true;
11079 
11080   // TODO: Add support for complex types (section 3.1.2, item 2).
11081 
11082   return false;
11083 }
11084 
11085 /// Computes the lane size (LS) of a return type or of an input parameter,
11086 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11087 /// TODO: Add support for references, section 3.2.1, item 1.
11088 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11089   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11090     QualType PTy = QT.getCanonicalType()->getPointeeType();
11091     if (getAArch64PBV(PTy, C))
11092       return C.getTypeSize(PTy);
11093   }
11094   if (getAArch64PBV(QT, C))
11095     return C.getTypeSize(QT);
11096 
11097   return C.getTypeSize(C.getUIntPtrType());
11098 }
11099 
11100 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11101 // signature of the scalar function, as defined in 3.2.2 of the
11102 // AAVFABI.
11103 static std::tuple<unsigned, unsigned, bool>
11104 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11105   QualType RetType = FD->getReturnType().getCanonicalType();
11106 
11107   ASTContext &C = FD->getASTContext();
11108 
11109   bool OutputBecomesInput = false;
11110 
11111   llvm::SmallVector<unsigned, 8> Sizes;
11112   if (!RetType->isVoidType()) {
11113     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11114     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11115       OutputBecomesInput = true;
11116   }
11117   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11118     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11119     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11120   }
11121 
11122   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11123   // The LS of a function parameter / return value can only be a power
11124   // of 2, starting from 8 bits, up to 128.
11125   assert(std::all_of(Sizes.begin(), Sizes.end(),
11126                      [](unsigned Size) {
11127                        return Size == 8 || Size == 16 || Size == 32 ||
11128                               Size == 64 || Size == 128;
11129                      }) &&
11130          "Invalid size");
11131 
11132   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11133                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11134                          OutputBecomesInput);
11135 }
11136 
11137 /// Mangle the parameter part of the vector function name according to
11138 /// their OpenMP classification. The mangling function is defined in
11139 /// section 3.5 of the AAVFABI.
11140 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11141   SmallString<256> Buffer;
11142   llvm::raw_svector_ostream Out(Buffer);
11143   for (const auto &ParamAttr : ParamAttrs) {
11144     switch (ParamAttr.Kind) {
11145     case LinearWithVarStride:
11146       Out << "ls" << ParamAttr.StrideOrArg;
11147       break;
11148     case Linear:
11149       Out << 'l';
11150       // Don't print the step value if it is not present or if it is
11151       // equal to 1.
11152       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
11153         Out << ParamAttr.StrideOrArg;
11154       break;
11155     case Uniform:
11156       Out << 'u';
11157       break;
11158     case Vector:
11159       Out << 'v';
11160       break;
11161     }
11162 
11163     if (!!ParamAttr.Alignment)
11164       Out << 'a' << ParamAttr.Alignment;
11165   }
11166 
11167   return std::string(Out.str());
11168 }
11169 
11170 // Function used to add the attribute. The parameter `VLEN` is
11171 // templated to allow the use of "x" when targeting scalable functions
11172 // for SVE.
11173 template <typename T>
11174 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11175                                  char ISA, StringRef ParSeq,
11176                                  StringRef MangledName, bool OutputBecomesInput,
11177                                  llvm::Function *Fn) {
11178   SmallString<256> Buffer;
11179   llvm::raw_svector_ostream Out(Buffer);
11180   Out << Prefix << ISA << LMask << VLEN;
11181   if (OutputBecomesInput)
11182     Out << "v";
11183   Out << ParSeq << "_" << MangledName;
11184   Fn->addFnAttr(Out.str());
11185 }
11186 
11187 // Helper function to generate the Advanced SIMD names depending on
11188 // the value of the NDS when simdlen is not present.
11189 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11190                                       StringRef Prefix, char ISA,
11191                                       StringRef ParSeq, StringRef MangledName,
11192                                       bool OutputBecomesInput,
11193                                       llvm::Function *Fn) {
11194   switch (NDS) {
11195   case 8:
11196     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11197                          OutputBecomesInput, Fn);
11198     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11199                          OutputBecomesInput, Fn);
11200     break;
11201   case 16:
11202     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11203                          OutputBecomesInput, Fn);
11204     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11205                          OutputBecomesInput, Fn);
11206     break;
11207   case 32:
11208     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11209                          OutputBecomesInput, Fn);
11210     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11211                          OutputBecomesInput, Fn);
11212     break;
11213   case 64:
11214   case 128:
11215     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11216                          OutputBecomesInput, Fn);
11217     break;
11218   default:
11219     llvm_unreachable("Scalar type is too wide.");
11220   }
11221 }
11222 
11223 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11224 static void emitAArch64DeclareSimdFunction(
11225     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11226     ArrayRef<ParamAttrTy> ParamAttrs,
11227     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11228     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11229 
11230   // Get basic data for building the vector signature.
11231   const auto Data = getNDSWDS(FD, ParamAttrs);
11232   const unsigned NDS = std::get<0>(Data);
11233   const unsigned WDS = std::get<1>(Data);
11234   const bool OutputBecomesInput = std::get<2>(Data);
11235 
11236   // Check the values provided via `simdlen` by the user.
11237   // 1. A `simdlen(1)` doesn't produce vector signatures,
11238   if (UserVLEN == 1) {
11239     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11240         DiagnosticsEngine::Warning,
11241         "The clause simdlen(1) has no effect when targeting aarch64.");
11242     CGM.getDiags().Report(SLoc, DiagID);
11243     return;
11244   }
11245 
11246   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11247   // Advanced SIMD output.
11248   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11249     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11250         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11251                                     "power of 2 when targeting Advanced SIMD.");
11252     CGM.getDiags().Report(SLoc, DiagID);
11253     return;
11254   }
11255 
11256   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11257   // limits.
11258   if (ISA == 's' && UserVLEN != 0) {
11259     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11260       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11261           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11262                                       "lanes in the architectural constraints "
11263                                       "for SVE (min is 128-bit, max is "
11264                                       "2048-bit, by steps of 128-bit)");
11265       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11266       return;
11267     }
11268   }
11269 
11270   // Sort out parameter sequence.
11271   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11272   StringRef Prefix = "_ZGV";
11273   // Generate simdlen from user input (if any).
11274   if (UserVLEN) {
11275     if (ISA == 's') {
11276       // SVE generates only a masked function.
11277       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11278                            OutputBecomesInput, Fn);
11279     } else {
11280       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11281       // Advanced SIMD generates one or two functions, depending on
11282       // the `[not]inbranch` clause.
11283       switch (State) {
11284       case OMPDeclareSimdDeclAttr::BS_Undefined:
11285         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11286                              OutputBecomesInput, Fn);
11287         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11288                              OutputBecomesInput, Fn);
11289         break;
11290       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11291         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11292                              OutputBecomesInput, Fn);
11293         break;
11294       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11295         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11296                              OutputBecomesInput, Fn);
11297         break;
11298       }
11299     }
11300   } else {
11301     // If no user simdlen is provided, follow the AAVFABI rules for
11302     // generating the vector length.
11303     if (ISA == 's') {
11304       // SVE, section 3.4.1, item 1.
11305       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11306                            OutputBecomesInput, Fn);
11307     } else {
11308       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11309       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11310       // two vector names depending on the use of the clause
11311       // `[not]inbranch`.
11312       switch (State) {
11313       case OMPDeclareSimdDeclAttr::BS_Undefined:
11314         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11315                                   OutputBecomesInput, Fn);
11316         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11317                                   OutputBecomesInput, Fn);
11318         break;
11319       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11320         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11321                                   OutputBecomesInput, Fn);
11322         break;
11323       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11324         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11325                                   OutputBecomesInput, Fn);
11326         break;
11327       }
11328     }
11329   }
11330 }
11331 
11332 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11333                                               llvm::Function *Fn) {
11334   ASTContext &C = CGM.getContext();
11335   FD = FD->getMostRecentDecl();
11336   // Map params to their positions in function decl.
11337   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11338   if (isa<CXXMethodDecl>(FD))
11339     ParamPositions.try_emplace(FD, 0);
11340   unsigned ParamPos = ParamPositions.size();
11341   for (const ParmVarDecl *P : FD->parameters()) {
11342     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11343     ++ParamPos;
11344   }
11345   while (FD) {
11346     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11347       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11348       // Mark uniform parameters.
11349       for (const Expr *E : Attr->uniforms()) {
11350         E = E->IgnoreParenImpCasts();
11351         unsigned Pos;
11352         if (isa<CXXThisExpr>(E)) {
11353           Pos = ParamPositions[FD];
11354         } else {
11355           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11356                                 ->getCanonicalDecl();
11357           Pos = ParamPositions[PVD];
11358         }
11359         ParamAttrs[Pos].Kind = Uniform;
11360       }
11361       // Get alignment info.
11362       auto NI = Attr->alignments_begin();
11363       for (const Expr *E : Attr->aligneds()) {
11364         E = E->IgnoreParenImpCasts();
11365         unsigned Pos;
11366         QualType ParmTy;
11367         if (isa<CXXThisExpr>(E)) {
11368           Pos = ParamPositions[FD];
11369           ParmTy = E->getType();
11370         } else {
11371           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11372                                 ->getCanonicalDecl();
11373           Pos = ParamPositions[PVD];
11374           ParmTy = PVD->getType();
11375         }
11376         ParamAttrs[Pos].Alignment =
11377             (*NI)
11378                 ? (*NI)->EvaluateKnownConstInt(C)
11379                 : llvm::APSInt::getUnsigned(
11380                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11381                           .getQuantity());
11382         ++NI;
11383       }
11384       // Mark linear parameters.
11385       auto SI = Attr->steps_begin();
11386       auto MI = Attr->modifiers_begin();
11387       for (const Expr *E : Attr->linears()) {
11388         E = E->IgnoreParenImpCasts();
11389         unsigned Pos;
11390         if (isa<CXXThisExpr>(E)) {
11391           Pos = ParamPositions[FD];
11392         } else {
11393           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11394                                 ->getCanonicalDecl();
11395           Pos = ParamPositions[PVD];
11396         }
11397         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11398         ParamAttr.Kind = Linear;
11399         if (*SI) {
11400           Expr::EvalResult Result;
11401           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11402             if (const auto *DRE =
11403                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11404               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11405                 ParamAttr.Kind = LinearWithVarStride;
11406                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11407                     ParamPositions[StridePVD->getCanonicalDecl()]);
11408               }
11409             }
11410           } else {
11411             ParamAttr.StrideOrArg = Result.Val.getInt();
11412           }
11413         }
11414         ++SI;
11415         ++MI;
11416       }
11417       llvm::APSInt VLENVal;
11418       SourceLocation ExprLoc;
11419       const Expr *VLENExpr = Attr->getSimdlen();
11420       if (VLENExpr) {
11421         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11422         ExprLoc = VLENExpr->getExprLoc();
11423       }
11424       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11425       if (CGM.getTriple().isX86()) {
11426         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11427       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11428         unsigned VLEN = VLENVal.getExtValue();
11429         StringRef MangledName = Fn->getName();
11430         if (CGM.getTarget().hasFeature("sve"))
11431           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11432                                          MangledName, 's', 128, Fn, ExprLoc);
11433         if (CGM.getTarget().hasFeature("neon"))
11434           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11435                                          MangledName, 'n', 128, Fn, ExprLoc);
11436       }
11437     }
11438     FD = FD->getPreviousDecl();
11439   }
11440 }
11441 
11442 namespace {
11443 /// Cleanup action for doacross support.
11444 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11445 public:
11446   static const int DoacrossFinArgs = 2;
11447 
11448 private:
11449   llvm::FunctionCallee RTLFn;
11450   llvm::Value *Args[DoacrossFinArgs];
11451 
11452 public:
11453   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11454                     ArrayRef<llvm::Value *> CallArgs)
11455       : RTLFn(RTLFn) {
11456     assert(CallArgs.size() == DoacrossFinArgs);
11457     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11458   }
11459   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11460     if (!CGF.HaveInsertPoint())
11461       return;
11462     CGF.EmitRuntimeCall(RTLFn, Args);
11463   }
11464 };
11465 } // namespace
11466 
11467 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11468                                        const OMPLoopDirective &D,
11469                                        ArrayRef<Expr *> NumIterations) {
11470   if (!CGF.HaveInsertPoint())
11471     return;
11472 
11473   ASTContext &C = CGM.getContext();
11474   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11475   RecordDecl *RD;
11476   if (KmpDimTy.isNull()) {
11477     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11478     //  kmp_int64 lo; // lower
11479     //  kmp_int64 up; // upper
11480     //  kmp_int64 st; // stride
11481     // };
11482     RD = C.buildImplicitRecord("kmp_dim");
11483     RD->startDefinition();
11484     addFieldToRecordDecl(C, RD, Int64Ty);
11485     addFieldToRecordDecl(C, RD, Int64Ty);
11486     addFieldToRecordDecl(C, RD, Int64Ty);
11487     RD->completeDefinition();
11488     KmpDimTy = C.getRecordType(RD);
11489   } else {
11490     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11491   }
11492   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11493   QualType ArrayTy =
11494       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11495 
11496   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11497   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11498   enum { LowerFD = 0, UpperFD, StrideFD };
11499   // Fill dims with data.
11500   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11501     LValue DimsLVal = CGF.MakeAddrLValue(
11502         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11503     // dims.upper = num_iterations;
11504     LValue UpperLVal = CGF.EmitLValueForField(
11505         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11506     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11507         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11508         Int64Ty, NumIterations[I]->getExprLoc());
11509     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11510     // dims.stride = 1;
11511     LValue StrideLVal = CGF.EmitLValueForField(
11512         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11513     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11514                           StrideLVal);
11515   }
11516 
11517   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11518   // kmp_int32 num_dims, struct kmp_dim * dims);
11519   llvm::Value *Args[] = {
11520       emitUpdateLocation(CGF, D.getBeginLoc()),
11521       getThreadID(CGF, D.getBeginLoc()),
11522       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11523       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11524           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11525           CGM.VoidPtrTy)};
11526 
11527   llvm::FunctionCallee RTLFn =
11528       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11529   CGF.EmitRuntimeCall(RTLFn, Args);
11530   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11531       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11532   llvm::FunctionCallee FiniRTLFn =
11533       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11534   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11535                                              llvm::makeArrayRef(FiniArgs));
11536 }
11537 
11538 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11539                                           const OMPDependClause *C) {
11540   QualType Int64Ty =
11541       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11542   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11543   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11544       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11545   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11546   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11547     const Expr *CounterVal = C->getLoopData(I);
11548     assert(CounterVal);
11549     llvm::Value *CntVal = CGF.EmitScalarConversion(
11550         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11551         CounterVal->getExprLoc());
11552     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11553                           /*Volatile=*/false, Int64Ty);
11554   }
11555   llvm::Value *Args[] = {
11556       emitUpdateLocation(CGF, C->getBeginLoc()),
11557       getThreadID(CGF, C->getBeginLoc()),
11558       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11559   llvm::FunctionCallee RTLFn;
11560   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11561     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11562   } else {
11563     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11564     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11565   }
11566   CGF.EmitRuntimeCall(RTLFn, Args);
11567 }
11568 
11569 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11570                                llvm::FunctionCallee Callee,
11571                                ArrayRef<llvm::Value *> Args) const {
11572   assert(Loc.isValid() && "Outlined function call location must be valid.");
11573   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11574 
11575   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11576     if (Fn->doesNotThrow()) {
11577       CGF.EmitNounwindRuntimeCall(Fn, Args);
11578       return;
11579     }
11580   }
11581   CGF.EmitRuntimeCall(Callee, Args);
11582 }
11583 
11584 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11585     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11586     ArrayRef<llvm::Value *> Args) const {
11587   emitCall(CGF, Loc, OutlinedFn, Args);
11588 }
11589 
11590 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11591   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11592     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11593       HasEmittedDeclareTargetRegion = true;
11594 }
11595 
11596 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11597                                              const VarDecl *NativeParam,
11598                                              const VarDecl *TargetParam) const {
11599   return CGF.GetAddrOfLocalVar(NativeParam);
11600 }
11601 
11602 namespace {
11603 /// Cleanup action for allocate support.
11604 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11605 public:
11606   static const int CleanupArgs = 3;
11607 
11608 private:
11609   llvm::FunctionCallee RTLFn;
11610   llvm::Value *Args[CleanupArgs];
11611 
11612 public:
11613   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11614                        ArrayRef<llvm::Value *> CallArgs)
11615       : RTLFn(RTLFn) {
11616     assert(CallArgs.size() == CleanupArgs &&
11617            "Size of arguments does not match.");
11618     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11619   }
11620   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11621     if (!CGF.HaveInsertPoint())
11622       return;
11623     CGF.EmitRuntimeCall(RTLFn, Args);
11624   }
11625 };
11626 } // namespace
11627 
11628 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11629                                                    const VarDecl *VD) {
11630   if (!VD)
11631     return Address::invalid();
11632   const VarDecl *CVD = VD->getCanonicalDecl();
11633   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11634     return Address::invalid();
11635   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11636   // Use the default allocation.
11637   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11638       !AA->getAllocator())
11639     return Address::invalid();
11640   llvm::Value *Size;
11641   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11642   if (CVD->getType()->isVariablyModifiedType()) {
11643     Size = CGF.getTypeSize(CVD->getType());
11644     // Align the size: ((size + align - 1) / align) * align
11645     Size = CGF.Builder.CreateNUWAdd(
11646         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11647     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11648     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11649   } else {
11650     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11651     Size = CGM.getSize(Sz.alignTo(Align));
11652   }
11653   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11654   assert(AA->getAllocator() &&
11655          "Expected allocator expression for non-default allocator.");
11656   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11657   // According to the standard, the original allocator type is a enum (integer).
11658   // Convert to pointer type, if required.
11659   if (Allocator->getType()->isIntegerTy())
11660     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11661   else if (Allocator->getType()->isPointerTy())
11662     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11663                                                                 CGM.VoidPtrTy);
11664   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11665 
11666   llvm::Value *Addr =
11667       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11668                           getName({CVD->getName(), ".void.addr"}));
11669   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11670                                                               Allocator};
11671   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11672 
11673   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11674                                                 llvm::makeArrayRef(FiniArgs));
11675   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11676       Addr,
11677       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11678       getName({CVD->getName(), ".addr"}));
11679   return Address(Addr, Align);
11680 }
11681 
11682 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11683     CodeGenModule &CGM, const OMPLoopDirective &S)
11684     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11685   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11686   if (!NeedToPush)
11687     return;
11688   NontemporalDeclsSet &DS =
11689       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11690   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11691     for (const Stmt *Ref : C->private_refs()) {
11692       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11693       const ValueDecl *VD;
11694       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11695         VD = DRE->getDecl();
11696       } else {
11697         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11698         assert((ME->isImplicitCXXThis() ||
11699                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11700                "Expected member of current class.");
11701         VD = ME->getMemberDecl();
11702       }
11703       DS.insert(VD);
11704     }
11705   }
11706 }
11707 
11708 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11709   if (!NeedToPush)
11710     return;
11711   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11712 }
11713 
11714 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11715   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11716 
11717   return llvm::any_of(
11718       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11719       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11720 }
11721 
11722 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11723     const OMPExecutableDirective &S,
11724     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11725     const {
11726   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11727   // Vars in target/task regions must be excluded completely.
11728   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11729       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11730     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11731     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11732     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11733     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11734       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11735         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11736     }
11737   }
11738   // Exclude vars in private clauses.
11739   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11740     for (const Expr *Ref : C->varlists()) {
11741       if (!Ref->getType()->isScalarType())
11742         continue;
11743       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11744       if (!DRE)
11745         continue;
11746       NeedToCheckForLPCs.insert(DRE->getDecl());
11747     }
11748   }
11749   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11750     for (const Expr *Ref : C->varlists()) {
11751       if (!Ref->getType()->isScalarType())
11752         continue;
11753       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11754       if (!DRE)
11755         continue;
11756       NeedToCheckForLPCs.insert(DRE->getDecl());
11757     }
11758   }
11759   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11760     for (const Expr *Ref : C->varlists()) {
11761       if (!Ref->getType()->isScalarType())
11762         continue;
11763       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11764       if (!DRE)
11765         continue;
11766       NeedToCheckForLPCs.insert(DRE->getDecl());
11767     }
11768   }
11769   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11770     for (const Expr *Ref : C->varlists()) {
11771       if (!Ref->getType()->isScalarType())
11772         continue;
11773       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11774       if (!DRE)
11775         continue;
11776       NeedToCheckForLPCs.insert(DRE->getDecl());
11777     }
11778   }
11779   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11780     for (const Expr *Ref : C->varlists()) {
11781       if (!Ref->getType()->isScalarType())
11782         continue;
11783       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11784       if (!DRE)
11785         continue;
11786       NeedToCheckForLPCs.insert(DRE->getDecl());
11787     }
11788   }
11789   for (const Decl *VD : NeedToCheckForLPCs) {
11790     for (const LastprivateConditionalData &Data :
11791          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11792       if (Data.DeclToUniqueName.count(VD) > 0) {
11793         if (!Data.Disabled)
11794           NeedToAddForLPCsAsDisabled.insert(VD);
11795         break;
11796       }
11797     }
11798   }
11799 }
11800 
11801 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11802     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11803     : CGM(CGF.CGM),
11804       Action((CGM.getLangOpts().OpenMP >= 50 &&
11805               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11806                            [](const OMPLastprivateClause *C) {
11807                              return C->getKind() ==
11808                                     OMPC_LASTPRIVATE_conditional;
11809                            }))
11810                  ? ActionToDo::PushAsLastprivateConditional
11811                  : ActionToDo::DoNotPush) {
11812   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11813   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11814     return;
11815   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11816          "Expected a push action.");
11817   LastprivateConditionalData &Data =
11818       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11819   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11820     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11821       continue;
11822 
11823     for (const Expr *Ref : C->varlists()) {
11824       Data.DeclToUniqueName.insert(std::make_pair(
11825           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11826           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11827     }
11828   }
11829   Data.IVLVal = IVLVal;
11830   Data.Fn = CGF.CurFn;
11831 }
11832 
11833 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11834     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11835     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11836   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11837   if (CGM.getLangOpts().OpenMP < 50)
11838     return;
11839   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11840   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11841   if (!NeedToAddForLPCsAsDisabled.empty()) {
11842     Action = ActionToDo::DisableLastprivateConditional;
11843     LastprivateConditionalData &Data =
11844         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11845     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11846       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11847     Data.Fn = CGF.CurFn;
11848     Data.Disabled = true;
11849   }
11850 }
11851 
11852 CGOpenMPRuntime::LastprivateConditionalRAII
11853 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11854     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11855   return LastprivateConditionalRAII(CGF, S);
11856 }
11857 
11858 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11859   if (CGM.getLangOpts().OpenMP < 50)
11860     return;
11861   if (Action == ActionToDo::DisableLastprivateConditional) {
11862     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11863            "Expected list of disabled private vars.");
11864     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11865   }
11866   if (Action == ActionToDo::PushAsLastprivateConditional) {
11867     assert(
11868         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11869         "Expected list of lastprivate conditional vars.");
11870     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11871   }
11872 }
11873 
11874 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11875                                                         const VarDecl *VD) {
11876   ASTContext &C = CGM.getContext();
11877   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11878   if (I == LastprivateConditionalToTypes.end())
11879     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11880   QualType NewType;
11881   const FieldDecl *VDField;
11882   const FieldDecl *FiredField;
11883   LValue BaseLVal;
11884   auto VI = I->getSecond().find(VD);
11885   if (VI == I->getSecond().end()) {
11886     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11887     RD->startDefinition();
11888     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11889     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11890     RD->completeDefinition();
11891     NewType = C.getRecordType(RD);
11892     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11893     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11894     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11895   } else {
11896     NewType = std::get<0>(VI->getSecond());
11897     VDField = std::get<1>(VI->getSecond());
11898     FiredField = std::get<2>(VI->getSecond());
11899     BaseLVal = std::get<3>(VI->getSecond());
11900   }
11901   LValue FiredLVal =
11902       CGF.EmitLValueForField(BaseLVal, FiredField);
11903   CGF.EmitStoreOfScalar(
11904       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11905       FiredLVal);
11906   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11907 }
11908 
11909 namespace {
11910 /// Checks if the lastprivate conditional variable is referenced in LHS.
11911 class LastprivateConditionalRefChecker final
11912     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11913   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11914   const Expr *FoundE = nullptr;
11915   const Decl *FoundD = nullptr;
11916   StringRef UniqueDeclName;
11917   LValue IVLVal;
11918   llvm::Function *FoundFn = nullptr;
11919   SourceLocation Loc;
11920 
11921 public:
11922   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11923     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11924          llvm::reverse(LPM)) {
11925       auto It = D.DeclToUniqueName.find(E->getDecl());
11926       if (It == D.DeclToUniqueName.end())
11927         continue;
11928       if (D.Disabled)
11929         return false;
11930       FoundE = E;
11931       FoundD = E->getDecl()->getCanonicalDecl();
11932       UniqueDeclName = It->second;
11933       IVLVal = D.IVLVal;
11934       FoundFn = D.Fn;
11935       break;
11936     }
11937     return FoundE == E;
11938   }
11939   bool VisitMemberExpr(const MemberExpr *E) {
11940     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11941       return false;
11942     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11943          llvm::reverse(LPM)) {
11944       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11945       if (It == D.DeclToUniqueName.end())
11946         continue;
11947       if (D.Disabled)
11948         return false;
11949       FoundE = E;
11950       FoundD = E->getMemberDecl()->getCanonicalDecl();
11951       UniqueDeclName = It->second;
11952       IVLVal = D.IVLVal;
11953       FoundFn = D.Fn;
11954       break;
11955     }
11956     return FoundE == E;
11957   }
11958   bool VisitStmt(const Stmt *S) {
11959     for (const Stmt *Child : S->children()) {
11960       if (!Child)
11961         continue;
11962       if (const auto *E = dyn_cast<Expr>(Child))
11963         if (!E->isGLValue())
11964           continue;
11965       if (Visit(Child))
11966         return true;
11967     }
11968     return false;
11969   }
11970   explicit LastprivateConditionalRefChecker(
11971       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11972       : LPM(LPM) {}
11973   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11974   getFoundData() const {
11975     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11976   }
11977 };
11978 } // namespace
11979 
11980 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11981                                                        LValue IVLVal,
11982                                                        StringRef UniqueDeclName,
11983                                                        LValue LVal,
11984                                                        SourceLocation Loc) {
11985   // Last updated loop counter for the lastprivate conditional var.
11986   // int<xx> last_iv = 0;
11987   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11988   llvm::Constant *LastIV =
11989       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11990   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11991       IVLVal.getAlignment().getAsAlign());
11992   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11993 
11994   // Last value of the lastprivate conditional.
11995   // decltype(priv_a) last_a;
11996   llvm::Constant *Last = getOrCreateInternalVariable(
11997       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11998   cast<llvm::GlobalVariable>(Last)->setAlignment(
11999       LVal.getAlignment().getAsAlign());
12000   LValue LastLVal =
12001       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12002 
12003   // Global loop counter. Required to handle inner parallel-for regions.
12004   // iv
12005   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12006 
12007   // #pragma omp critical(a)
12008   // if (last_iv <= iv) {
12009   //   last_iv = iv;
12010   //   last_a = priv_a;
12011   // }
12012   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12013                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12014     Action.Enter(CGF);
12015     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12016     // (last_iv <= iv) ? Check if the variable is updated and store new
12017     // value in global var.
12018     llvm::Value *CmpRes;
12019     if (IVLVal.getType()->isSignedIntegerType()) {
12020       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12021     } else {
12022       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12023              "Loop iteration variable must be integer.");
12024       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12025     }
12026     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12027     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12028     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12029     // {
12030     CGF.EmitBlock(ThenBB);
12031 
12032     //   last_iv = iv;
12033     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12034 
12035     //   last_a = priv_a;
12036     switch (CGF.getEvaluationKind(LVal.getType())) {
12037     case TEK_Scalar: {
12038       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12039       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12040       break;
12041     }
12042     case TEK_Complex: {
12043       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12044       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12045       break;
12046     }
12047     case TEK_Aggregate:
12048       llvm_unreachable(
12049           "Aggregates are not supported in lastprivate conditional.");
12050     }
12051     // }
12052     CGF.EmitBranch(ExitBB);
12053     // There is no need to emit line number for unconditional branch.
12054     (void)ApplyDebugLocation::CreateEmpty(CGF);
12055     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12056   };
12057 
12058   if (CGM.getLangOpts().OpenMPSimd) {
12059     // Do not emit as a critical region as no parallel region could be emitted.
12060     RegionCodeGenTy ThenRCG(CodeGen);
12061     ThenRCG(CGF);
12062   } else {
12063     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12064   }
12065 }
12066 
12067 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12068                                                          const Expr *LHS) {
12069   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12070     return;
12071   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12072   if (!Checker.Visit(LHS))
12073     return;
12074   const Expr *FoundE;
12075   const Decl *FoundD;
12076   StringRef UniqueDeclName;
12077   LValue IVLVal;
12078   llvm::Function *FoundFn;
12079   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12080       Checker.getFoundData();
12081   if (FoundFn != CGF.CurFn) {
12082     // Special codegen for inner parallel regions.
12083     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12084     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12085     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12086            "Lastprivate conditional is not found in outer region.");
12087     QualType StructTy = std::get<0>(It->getSecond());
12088     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12089     LValue PrivLVal = CGF.EmitLValue(FoundE);
12090     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12091         PrivLVal.getAddress(CGF),
12092         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12093     LValue BaseLVal =
12094         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12095     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12096     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12097                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12098                         FiredLVal, llvm::AtomicOrdering::Unordered,
12099                         /*IsVolatile=*/true, /*isInit=*/false);
12100     return;
12101   }
12102 
12103   // Private address of the lastprivate conditional in the current context.
12104   // priv_a
12105   LValue LVal = CGF.EmitLValue(FoundE);
12106   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12107                                    FoundE->getExprLoc());
12108 }
12109 
12110 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12111     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12112     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12113   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12114     return;
12115   auto Range = llvm::reverse(LastprivateConditionalStack);
12116   auto It = llvm::find_if(
12117       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12118   if (It == Range.end() || It->Fn != CGF.CurFn)
12119     return;
12120   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12121   assert(LPCI != LastprivateConditionalToTypes.end() &&
12122          "Lastprivates must be registered already.");
12123   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12124   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12125   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12126   for (const auto &Pair : It->DeclToUniqueName) {
12127     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12128     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12129       continue;
12130     auto I = LPCI->getSecond().find(Pair.first);
12131     assert(I != LPCI->getSecond().end() &&
12132            "Lastprivate must be rehistered already.");
12133     // bool Cmp = priv_a.Fired != 0;
12134     LValue BaseLVal = std::get<3>(I->getSecond());
12135     LValue FiredLVal =
12136         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12137     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12138     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12139     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12140     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12141     // if (Cmp) {
12142     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12143     CGF.EmitBlock(ThenBB);
12144     Address Addr = CGF.GetAddrOfLocalVar(VD);
12145     LValue LVal;
12146     if (VD->getType()->isReferenceType())
12147       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12148                                            AlignmentSource::Decl);
12149     else
12150       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12151                                 AlignmentSource::Decl);
12152     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12153                                      D.getBeginLoc());
12154     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12155     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12156     // }
12157   }
12158 }
12159 
12160 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12161     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12162     SourceLocation Loc) {
12163   if (CGF.getLangOpts().OpenMP < 50)
12164     return;
12165   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12166   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12167          "Unknown lastprivate conditional variable.");
12168   StringRef UniqueName = It->second;
12169   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12170   // The variable was not updated in the region - exit.
12171   if (!GV)
12172     return;
12173   LValue LPLVal = CGF.MakeAddrLValue(
12174       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12175   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12176   CGF.EmitStoreOfScalar(Res, PrivLVal);
12177 }
12178 
12179 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12180     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12181     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12182   llvm_unreachable("Not supported in SIMD-only mode");
12183 }
12184 
12185 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12186     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12187     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12188   llvm_unreachable("Not supported in SIMD-only mode");
12189 }
12190 
12191 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12192     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12193     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12194     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12195     bool Tied, unsigned &NumberOfParts) {
12196   llvm_unreachable("Not supported in SIMD-only mode");
12197 }
12198 
12199 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12200                                            SourceLocation Loc,
12201                                            llvm::Function *OutlinedFn,
12202                                            ArrayRef<llvm::Value *> CapturedVars,
12203                                            const Expr *IfCond) {
12204   llvm_unreachable("Not supported in SIMD-only mode");
12205 }
12206 
12207 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12208     CodeGenFunction &CGF, StringRef CriticalName,
12209     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12210     const Expr *Hint) {
12211   llvm_unreachable("Not supported in SIMD-only mode");
12212 }
12213 
12214 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12215                                            const RegionCodeGenTy &MasterOpGen,
12216                                            SourceLocation Loc) {
12217   llvm_unreachable("Not supported in SIMD-only mode");
12218 }
12219 
12220 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12221                                             SourceLocation Loc) {
12222   llvm_unreachable("Not supported in SIMD-only mode");
12223 }
12224 
12225 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12226     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12227     SourceLocation Loc) {
12228   llvm_unreachable("Not supported in SIMD-only mode");
12229 }
12230 
12231 void CGOpenMPSIMDRuntime::emitSingleRegion(
12232     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12233     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12234     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12235     ArrayRef<const Expr *> AssignmentOps) {
12236   llvm_unreachable("Not supported in SIMD-only mode");
12237 }
12238 
12239 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12240                                             const RegionCodeGenTy &OrderedOpGen,
12241                                             SourceLocation Loc,
12242                                             bool IsThreads) {
12243   llvm_unreachable("Not supported in SIMD-only mode");
12244 }
12245 
12246 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12247                                           SourceLocation Loc,
12248                                           OpenMPDirectiveKind Kind,
12249                                           bool EmitChecks,
12250                                           bool ForceSimpleCall) {
12251   llvm_unreachable("Not supported in SIMD-only mode");
12252 }
12253 
12254 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12255     CodeGenFunction &CGF, SourceLocation Loc,
12256     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12257     bool Ordered, const DispatchRTInput &DispatchValues) {
12258   llvm_unreachable("Not supported in SIMD-only mode");
12259 }
12260 
12261 void CGOpenMPSIMDRuntime::emitForStaticInit(
12262     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12263     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12264   llvm_unreachable("Not supported in SIMD-only mode");
12265 }
12266 
12267 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12268     CodeGenFunction &CGF, SourceLocation Loc,
12269     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12270   llvm_unreachable("Not supported in SIMD-only mode");
12271 }
12272 
12273 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12274                                                      SourceLocation Loc,
12275                                                      unsigned IVSize,
12276                                                      bool IVSigned) {
12277   llvm_unreachable("Not supported in SIMD-only mode");
12278 }
12279 
12280 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12281                                               SourceLocation Loc,
12282                                               OpenMPDirectiveKind DKind) {
12283   llvm_unreachable("Not supported in SIMD-only mode");
12284 }
12285 
12286 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12287                                               SourceLocation Loc,
12288                                               unsigned IVSize, bool IVSigned,
12289                                               Address IL, Address LB,
12290                                               Address UB, Address ST) {
12291   llvm_unreachable("Not supported in SIMD-only mode");
12292 }
12293 
12294 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12295                                                llvm::Value *NumThreads,
12296                                                SourceLocation Loc) {
12297   llvm_unreachable("Not supported in SIMD-only mode");
12298 }
12299 
12300 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12301                                              ProcBindKind ProcBind,
12302                                              SourceLocation Loc) {
12303   llvm_unreachable("Not supported in SIMD-only mode");
12304 }
12305 
12306 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12307                                                     const VarDecl *VD,
12308                                                     Address VDAddr,
12309                                                     SourceLocation Loc) {
12310   llvm_unreachable("Not supported in SIMD-only mode");
12311 }
12312 
12313 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12314     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12315     CodeGenFunction *CGF) {
12316   llvm_unreachable("Not supported in SIMD-only mode");
12317 }
12318 
12319 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12320     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12321   llvm_unreachable("Not supported in SIMD-only mode");
12322 }
12323 
12324 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12325                                     ArrayRef<const Expr *> Vars,
12326                                     SourceLocation Loc,
12327                                     llvm::AtomicOrdering AO) {
12328   llvm_unreachable("Not supported in SIMD-only mode");
12329 }
12330 
12331 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12332                                        const OMPExecutableDirective &D,
12333                                        llvm::Function *TaskFunction,
12334                                        QualType SharedsTy, Address Shareds,
12335                                        const Expr *IfCond,
12336                                        const OMPTaskDataTy &Data) {
12337   llvm_unreachable("Not supported in SIMD-only mode");
12338 }
12339 
12340 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12341     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12342     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12343     const Expr *IfCond, const OMPTaskDataTy &Data) {
12344   llvm_unreachable("Not supported in SIMD-only mode");
12345 }
12346 
12347 void CGOpenMPSIMDRuntime::emitReduction(
12348     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12349     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12350     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12351   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12352   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12353                                  ReductionOps, Options);
12354 }
12355 
12356 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12357     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12358     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12359   llvm_unreachable("Not supported in SIMD-only mode");
12360 }
12361 
12362 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12363                                                   SourceLocation Loc,
12364                                                   ReductionCodeGen &RCG,
12365                                                   unsigned N) {
12366   llvm_unreachable("Not supported in SIMD-only mode");
12367 }
12368 
12369 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12370                                                   SourceLocation Loc,
12371                                                   llvm::Value *ReductionsPtr,
12372                                                   LValue SharedLVal) {
12373   llvm_unreachable("Not supported in SIMD-only mode");
12374 }
12375 
12376 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12377                                            SourceLocation Loc) {
12378   llvm_unreachable("Not supported in SIMD-only mode");
12379 }
12380 
12381 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12382     CodeGenFunction &CGF, SourceLocation Loc,
12383     OpenMPDirectiveKind CancelRegion) {
12384   llvm_unreachable("Not supported in SIMD-only mode");
12385 }
12386 
12387 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12388                                          SourceLocation Loc, const Expr *IfCond,
12389                                          OpenMPDirectiveKind CancelRegion) {
12390   llvm_unreachable("Not supported in SIMD-only mode");
12391 }
12392 
12393 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12394     const OMPExecutableDirective &D, StringRef ParentName,
12395     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12396     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12397   llvm_unreachable("Not supported in SIMD-only mode");
12398 }
12399 
12400 void CGOpenMPSIMDRuntime::emitTargetCall(
12401     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12402     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12403     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12404     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12405                                      const OMPLoopDirective &D)>
12406         SizeEmitter) {
12407   llvm_unreachable("Not supported in SIMD-only mode");
12408 }
12409 
12410 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12411   llvm_unreachable("Not supported in SIMD-only mode");
12412 }
12413 
12414 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12415   llvm_unreachable("Not supported in SIMD-only mode");
12416 }
12417 
12418 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12419   return false;
12420 }
12421 
12422 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12423                                         const OMPExecutableDirective &D,
12424                                         SourceLocation Loc,
12425                                         llvm::Function *OutlinedFn,
12426                                         ArrayRef<llvm::Value *> CapturedVars) {
12427   llvm_unreachable("Not supported in SIMD-only mode");
12428 }
12429 
12430 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12431                                              const Expr *NumTeams,
12432                                              const Expr *ThreadLimit,
12433                                              SourceLocation Loc) {
12434   llvm_unreachable("Not supported in SIMD-only mode");
12435 }
12436 
12437 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12438     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12439     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12440   llvm_unreachable("Not supported in SIMD-only mode");
12441 }
12442 
12443 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12444     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12445     const Expr *Device) {
12446   llvm_unreachable("Not supported in SIMD-only mode");
12447 }
12448 
12449 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12450                                            const OMPLoopDirective &D,
12451                                            ArrayRef<Expr *> NumIterations) {
12452   llvm_unreachable("Not supported in SIMD-only mode");
12453 }
12454 
12455 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12456                                               const OMPDependClause *C) {
12457   llvm_unreachable("Not supported in SIMD-only mode");
12458 }
12459 
12460 const VarDecl *
12461 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12462                                         const VarDecl *NativeParam) const {
12463   llvm_unreachable("Not supported in SIMD-only mode");
12464 }
12465 
12466 Address
12467 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12468                                          const VarDecl *NativeParam,
12469                                          const VarDecl *TargetParam) const {
12470   llvm_unreachable("Not supported in SIMD-only mode");
12471 }
12472