1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 enum OpenMPRTLFunction {
573   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
574   /// kmpc_micro microtask, ...);
575   OMPRTL__kmpc_fork_call,
576   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
577   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
578   OMPRTL__kmpc_threadprivate_cached,
579   /// Call to void __kmpc_threadprivate_register( ident_t *,
580   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
581   OMPRTL__kmpc_threadprivate_register,
582   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
583   OMPRTL__kmpc_global_thread_num,
584   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
585   // kmp_critical_name *crit);
586   OMPRTL__kmpc_critical,
587   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
588   // global_tid, kmp_critical_name *crit, uintptr_t hint);
589   OMPRTL__kmpc_critical_with_hint,
590   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
591   // kmp_critical_name *crit);
592   OMPRTL__kmpc_end_critical,
593   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_cancel_barrier,
596   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_barrier,
598   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
599   OMPRTL__kmpc_for_static_fini,
600   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
601   // global_tid);
602   OMPRTL__kmpc_serialized_parallel,
603   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
604   // global_tid);
605   OMPRTL__kmpc_end_serialized_parallel,
606   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
607   // kmp_int32 num_threads);
608   OMPRTL__kmpc_push_num_threads,
609   // Call to void __kmpc_flush(ident_t *loc);
610   OMPRTL__kmpc_flush,
611   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_master,
613   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_master,
615   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
616   // int end_part);
617   OMPRTL__kmpc_omp_taskyield,
618   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
619   OMPRTL__kmpc_single,
620   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
621   OMPRTL__kmpc_end_single,
622   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
623   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
624   // kmp_routine_entry_t *task_entry);
625   OMPRTL__kmpc_omp_task_alloc,
626   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
627   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
628   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
629   // kmp_int64 device_id);
630   OMPRTL__kmpc_omp_target_task_alloc,
631   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
632   // new_task);
633   OMPRTL__kmpc_omp_task,
634   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
635   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
636   // kmp_int32 didit);
637   OMPRTL__kmpc_copyprivate,
638   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
639   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
640   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
641   OMPRTL__kmpc_reduce,
642   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
643   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
644   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
645   // *lck);
646   OMPRTL__kmpc_reduce_nowait,
647   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
648   // kmp_critical_name *lck);
649   OMPRTL__kmpc_end_reduce,
650   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
651   // kmp_critical_name *lck);
652   OMPRTL__kmpc_end_reduce_nowait,
653   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
654   // kmp_task_t * new_task);
655   OMPRTL__kmpc_omp_task_begin_if0,
656   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
657   // kmp_task_t * new_task);
658   OMPRTL__kmpc_omp_task_complete_if0,
659   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_ordered,
661   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_ordered,
663   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
664   // global_tid);
665   OMPRTL__kmpc_omp_taskwait,
666   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
667   OMPRTL__kmpc_taskgroup,
668   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
669   OMPRTL__kmpc_end_taskgroup,
670   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
671   // int proc_bind);
672   OMPRTL__kmpc_push_proc_bind,
673   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
674   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
675   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
676   OMPRTL__kmpc_omp_task_with_deps,
677   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
678   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
679   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
680   OMPRTL__kmpc_omp_wait_deps,
681   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
682   // global_tid, kmp_int32 cncl_kind);
683   OMPRTL__kmpc_cancellationpoint,
684   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
685   // kmp_int32 cncl_kind);
686   OMPRTL__kmpc_cancel,
687   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
688   // kmp_int32 num_teams, kmp_int32 thread_limit);
689   OMPRTL__kmpc_push_num_teams,
690   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
691   // microtask, ...);
692   OMPRTL__kmpc_fork_teams,
693   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
694   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
695   // sched, kmp_uint64 grainsize, void *task_dup);
696   OMPRTL__kmpc_taskloop,
697   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
698   // num_dims, struct kmp_dim *dims);
699   OMPRTL__kmpc_doacross_init,
700   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
701   OMPRTL__kmpc_doacross_fini,
702   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
703   // *vec);
704   OMPRTL__kmpc_doacross_post,
705   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
706   // *vec);
707   OMPRTL__kmpc_doacross_wait,
708   // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data);
709   OMPRTL__kmpc_taskred_init,
710   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
711   // *d);
712   OMPRTL__kmpc_task_reduction_get_th_data,
713   // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
714   // is_ws, int num, void *data);
715   OMPRTL__kmpc_taskred_modifier_init,
716   // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
717   // int is_ws);
718   OMPRTL__kmpc_task_reduction_modifier_fini,
719   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
720   OMPRTL__kmpc_alloc,
721   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
722   OMPRTL__kmpc_free,
723 
724   //
725   // Offloading related calls
726   //
727   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
728   // size);
729   OMPRTL__kmpc_push_target_tripcount,
730   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
731   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
732   // *arg_types);
733   OMPRTL__tgt_target,
734   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
735   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
736   // *arg_types);
737   OMPRTL__tgt_target_nowait,
738   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
739   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
740   // *arg_types, int32_t num_teams, int32_t thread_limit);
741   OMPRTL__tgt_target_teams,
742   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
743   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
744   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
745   OMPRTL__tgt_target_teams_nowait,
746   // Call to void __tgt_register_requires(int64_t flags);
747   OMPRTL__tgt_register_requires,
748   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_begin,
751   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_begin_nowait,
755   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
756   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
757   OMPRTL__tgt_target_data_end,
758   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
759   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
760   // *arg_types);
761   OMPRTL__tgt_target_data_end_nowait,
762   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
763   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
764   OMPRTL__tgt_target_data_update,
765   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
766   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
767   // *arg_types);
768   OMPRTL__tgt_target_data_update_nowait,
769   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
770   OMPRTL__tgt_mapper_num_components,
771   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
772   // *base, void *begin, int64_t size, int64_t type);
773   OMPRTL__tgt_push_mapper_component,
774   // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
775   // int gtid, kmp_task_t *task);
776   OMPRTL__kmpc_task_allow_completion_event,
777 };
778 
779 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
780 /// region.
781 class CleanupTy final : public EHScopeStack::Cleanup {
782   PrePostActionTy *Action;
783 
784 public:
785   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
786   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
787     if (!CGF.HaveInsertPoint())
788       return;
789     Action->Exit(CGF);
790   }
791 };
792 
793 } // anonymous namespace
794 
795 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
796   CodeGenFunction::RunCleanupsScope Scope(CGF);
797   if (PrePostAction) {
798     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
799     Callback(CodeGen, CGF, *PrePostAction);
800   } else {
801     PrePostActionTy Action;
802     Callback(CodeGen, CGF, Action);
803   }
804 }
805 
806 /// Check if the combiner is a call to UDR combiner and if it is so return the
807 /// UDR decl used for reduction.
808 static const OMPDeclareReductionDecl *
809 getReductionInit(const Expr *ReductionOp) {
810   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
811     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
812       if (const auto *DRE =
813               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
814         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
815           return DRD;
816   return nullptr;
817 }
818 
819 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
820                                              const OMPDeclareReductionDecl *DRD,
821                                              const Expr *InitOp,
822                                              Address Private, Address Original,
823                                              QualType Ty) {
824   if (DRD->getInitializer()) {
825     std::pair<llvm::Function *, llvm::Function *> Reduction =
826         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
827     const auto *CE = cast<CallExpr>(InitOp);
828     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
829     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
830     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
831     const auto *LHSDRE =
832         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
833     const auto *RHSDRE =
834         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
835     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
836     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
837                             [=]() { return Private; });
838     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
839                             [=]() { return Original; });
840     (void)PrivateScope.Privatize();
841     RValue Func = RValue::get(Reduction.second);
842     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
843     CGF.EmitIgnoredExpr(InitOp);
844   } else {
845     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
846     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
847     auto *GV = new llvm::GlobalVariable(
848         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
849         llvm::GlobalValue::PrivateLinkage, Init, Name);
850     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
851     RValue InitRVal;
852     switch (CGF.getEvaluationKind(Ty)) {
853     case TEK_Scalar:
854       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
855       break;
856     case TEK_Complex:
857       InitRVal =
858           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
859       break;
860     case TEK_Aggregate:
861       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
862       break;
863     }
864     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
865     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
866     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
867                          /*IsInitializer=*/false);
868   }
869 }
870 
871 /// Emit initialization of arrays of complex types.
872 /// \param DestAddr Address of the array.
873 /// \param Type Type of array.
874 /// \param Init Initial expression of array.
875 /// \param SrcAddr Address of the original array.
876 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
877                                  QualType Type, bool EmitDeclareReductionInit,
878                                  const Expr *Init,
879                                  const OMPDeclareReductionDecl *DRD,
880                                  Address SrcAddr = Address::invalid()) {
881   // Perform element-by-element initialization.
882   QualType ElementTy;
883 
884   // Drill down to the base element type on both arrays.
885   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
886   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
887   DestAddr =
888       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
889   if (DRD)
890     SrcAddr =
891         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
892 
893   llvm::Value *SrcBegin = nullptr;
894   if (DRD)
895     SrcBegin = SrcAddr.getPointer();
896   llvm::Value *DestBegin = DestAddr.getPointer();
897   // Cast from pointer to array type to pointer to single element.
898   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
899   // The basic structure here is a while-do loop.
900   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
901   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
902   llvm::Value *IsEmpty =
903       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
904   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
905 
906   // Enter the loop body, making that address the current address.
907   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
908   CGF.EmitBlock(BodyBB);
909 
910   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
911 
912   llvm::PHINode *SrcElementPHI = nullptr;
913   Address SrcElementCurrent = Address::invalid();
914   if (DRD) {
915     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
916                                           "omp.arraycpy.srcElementPast");
917     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
918     SrcElementCurrent =
919         Address(SrcElementPHI,
920                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
921   }
922   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
923       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
924   DestElementPHI->addIncoming(DestBegin, EntryBB);
925   Address DestElementCurrent =
926       Address(DestElementPHI,
927               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
928 
929   // Emit copy.
930   {
931     CodeGenFunction::RunCleanupsScope InitScope(CGF);
932     if (EmitDeclareReductionInit) {
933       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
934                                        SrcElementCurrent, ElementTy);
935     } else
936       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
937                            /*IsInitializer=*/false);
938   }
939 
940   if (DRD) {
941     // Shift the address forward by one element.
942     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
943         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
944     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
945   }
946 
947   // Shift the address forward by one element.
948   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
949       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
950   // Check whether we've reached the end.
951   llvm::Value *Done =
952       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
953   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
954   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
955 
956   // Done.
957   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
958 }
959 
960 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
961   return CGF.EmitOMPSharedLValue(E);
962 }
963 
964 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
965                                             const Expr *E) {
966   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
967     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
968   return LValue();
969 }
970 
971 void ReductionCodeGen::emitAggregateInitialization(
972     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
973     const OMPDeclareReductionDecl *DRD) {
974   // Emit VarDecl with copy init for arrays.
975   // Get the address of the original variable captured in current
976   // captured region.
977   const auto *PrivateVD =
978       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
979   bool EmitDeclareReductionInit =
980       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
981   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
982                        EmitDeclareReductionInit,
983                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
984                                                 : PrivateVD->getInit(),
985                        DRD, SharedLVal.getAddress(CGF));
986 }
987 
988 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
989                                    ArrayRef<const Expr *> Origs,
990                                    ArrayRef<const Expr *> Privates,
991                                    ArrayRef<const Expr *> ReductionOps) {
992   ClausesData.reserve(Shareds.size());
993   SharedAddresses.reserve(Shareds.size());
994   Sizes.reserve(Shareds.size());
995   BaseDecls.reserve(Shareds.size());
996   const auto *IOrig = Origs.begin();
997   const auto *IPriv = Privates.begin();
998   const auto *IRed = ReductionOps.begin();
999   for (const Expr *Ref : Shareds) {
1000     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
1001     std::advance(IOrig, 1);
1002     std::advance(IPriv, 1);
1003     std::advance(IRed, 1);
1004   }
1005 }
1006 
1007 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
1008   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
1009          "Number of generated lvalues must be exactly N.");
1010   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
1011   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
1012   SharedAddresses.emplace_back(First, Second);
1013   if (ClausesData[N].Shared == ClausesData[N].Ref) {
1014     OrigAddresses.emplace_back(First, Second);
1015   } else {
1016     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1017     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1018     OrigAddresses.emplace_back(First, Second);
1019   }
1020 }
1021 
1022 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1023   const auto *PrivateVD =
1024       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1025   QualType PrivateType = PrivateVD->getType();
1026   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1027   if (!PrivateType->isVariablyModifiedType()) {
1028     Sizes.emplace_back(
1029         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
1030         nullptr);
1031     return;
1032   }
1033   llvm::Value *Size;
1034   llvm::Value *SizeInChars;
1035   auto *ElemType =
1036       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
1037           ->getElementType();
1038   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1039   if (AsArraySection) {
1040     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
1041                                      OrigAddresses[N].first.getPointer(CGF));
1042     Size = CGF.Builder.CreateNUWAdd(
1043         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1044     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1045   } else {
1046     SizeInChars =
1047         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
1048     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1049   }
1050   Sizes.emplace_back(SizeInChars, Size);
1051   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1052       CGF,
1053       cast<OpaqueValueExpr>(
1054           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1055       RValue::get(Size));
1056   CGF.EmitVariablyModifiedType(PrivateType);
1057 }
1058 
1059 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1060                                          llvm::Value *Size) {
1061   const auto *PrivateVD =
1062       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1063   QualType PrivateType = PrivateVD->getType();
1064   if (!PrivateType->isVariablyModifiedType()) {
1065     assert(!Size && !Sizes[N].second &&
1066            "Size should be nullptr for non-variably modified reduction "
1067            "items.");
1068     return;
1069   }
1070   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1071       CGF,
1072       cast<OpaqueValueExpr>(
1073           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1074       RValue::get(Size));
1075   CGF.EmitVariablyModifiedType(PrivateType);
1076 }
1077 
1078 void ReductionCodeGen::emitInitialization(
1079     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1080     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1081   assert(SharedAddresses.size() > N && "No variable was generated");
1082   const auto *PrivateVD =
1083       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1084   const OMPDeclareReductionDecl *DRD =
1085       getReductionInit(ClausesData[N].ReductionOp);
1086   QualType PrivateType = PrivateVD->getType();
1087   PrivateAddr = CGF.Builder.CreateElementBitCast(
1088       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1089   QualType SharedType = SharedAddresses[N].first.getType();
1090   SharedLVal = CGF.MakeAddrLValue(
1091       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1092                                        CGF.ConvertTypeForMem(SharedType)),
1093       SharedType, SharedAddresses[N].first.getBaseInfo(),
1094       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1095   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1096     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1097   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1098     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1099                                      PrivateAddr, SharedLVal.getAddress(CGF),
1100                                      SharedLVal.getType());
1101   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1102              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1103     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1104                          PrivateVD->getType().getQualifiers(),
1105                          /*IsInitializer=*/false);
1106   }
1107 }
1108 
1109 bool ReductionCodeGen::needCleanups(unsigned N) {
1110   const auto *PrivateVD =
1111       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1112   QualType PrivateType = PrivateVD->getType();
1113   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1114   return DTorKind != QualType::DK_none;
1115 }
1116 
1117 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1118                                     Address PrivateAddr) {
1119   const auto *PrivateVD =
1120       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1121   QualType PrivateType = PrivateVD->getType();
1122   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1123   if (needCleanups(N)) {
1124     PrivateAddr = CGF.Builder.CreateElementBitCast(
1125         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1126     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1127   }
1128 }
1129 
1130 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1131                           LValue BaseLV) {
1132   BaseTy = BaseTy.getNonReferenceType();
1133   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1136       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1137     } else {
1138       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1139       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1140     }
1141     BaseTy = BaseTy->getPointeeType();
1142   }
1143   return CGF.MakeAddrLValue(
1144       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1145                                        CGF.ConvertTypeForMem(ElTy)),
1146       BaseLV.getType(), BaseLV.getBaseInfo(),
1147       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1148 }
1149 
1150 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1151                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1152                           llvm::Value *Addr) {
1153   Address Tmp = Address::invalid();
1154   Address TopTmp = Address::invalid();
1155   Address MostTopTmp = Address::invalid();
1156   BaseTy = BaseTy.getNonReferenceType();
1157   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1158          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1159     Tmp = CGF.CreateMemTemp(BaseTy);
1160     if (TopTmp.isValid())
1161       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1162     else
1163       MostTopTmp = Tmp;
1164     TopTmp = Tmp;
1165     BaseTy = BaseTy->getPointeeType();
1166   }
1167   llvm::Type *Ty = BaseLVType;
1168   if (Tmp.isValid())
1169     Ty = Tmp.getElementType();
1170   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1171   if (Tmp.isValid()) {
1172     CGF.Builder.CreateStore(Addr, Tmp);
1173     return MostTopTmp;
1174   }
1175   return Address(Addr, BaseLVAlignment);
1176 }
1177 
1178 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1179   const VarDecl *OrigVD = nullptr;
1180   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1181     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1182     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1183       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1184     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1185       Base = TempASE->getBase()->IgnoreParenImpCasts();
1186     DE = cast<DeclRefExpr>(Base);
1187     OrigVD = cast<VarDecl>(DE->getDecl());
1188   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1189     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1190     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1191       Base = TempASE->getBase()->IgnoreParenImpCasts();
1192     DE = cast<DeclRefExpr>(Base);
1193     OrigVD = cast<VarDecl>(DE->getDecl());
1194   }
1195   return OrigVD;
1196 }
1197 
1198 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1199                                                Address PrivateAddr) {
1200   const DeclRefExpr *DE;
1201   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1202     BaseDecls.emplace_back(OrigVD);
1203     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1204     LValue BaseLValue =
1205         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1206                     OriginalBaseLValue);
1207     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1208         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1209     llvm::Value *PrivatePointer =
1210         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1211             PrivateAddr.getPointer(),
1212             SharedAddresses[N].first.getAddress(CGF).getType());
1213     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1214     return castToBase(CGF, OrigVD->getType(),
1215                       SharedAddresses[N].first.getType(),
1216                       OriginalBaseLValue.getAddress(CGF).getType(),
1217                       OriginalBaseLValue.getAlignment(), Ptr);
1218   }
1219   BaseDecls.emplace_back(
1220       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1221   return PrivateAddr;
1222 }
1223 
1224 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1225   const OMPDeclareReductionDecl *DRD =
1226       getReductionInit(ClausesData[N].ReductionOp);
1227   return DRD && DRD->getInitializer();
1228 }
1229 
1230 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1231   return CGF.EmitLoadOfPointerLValue(
1232       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1233       getThreadIDVariable()->getType()->castAs<PointerType>());
1234 }
1235 
1236 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1237   if (!CGF.HaveInsertPoint())
1238     return;
1239   // 1.2.2 OpenMP Language Terminology
1240   // Structured block - An executable statement with a single entry at the
1241   // top and a single exit at the bottom.
1242   // The point of exit cannot be a branch out of the structured block.
1243   // longjmp() and throw() must not violate the entry/exit criteria.
1244   CGF.EHStack.pushTerminate();
1245   CodeGen(CGF);
1246   CGF.EHStack.popTerminate();
1247 }
1248 
1249 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1250     CodeGenFunction &CGF) {
1251   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1252                             getThreadIDVariable()->getType(),
1253                             AlignmentSource::Decl);
1254 }
1255 
1256 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1257                                        QualType FieldTy) {
1258   auto *Field = FieldDecl::Create(
1259       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1260       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1261       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1262   Field->setAccess(AS_public);
1263   DC->addDecl(Field);
1264   return Field;
1265 }
1266 
1267 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1268                                  StringRef Separator)
1269     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1270       OffloadEntriesInfoManager(CGM) {
1271   ASTContext &C = CGM.getContext();
1272   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1273   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1274   RD->startDefinition();
1275   // reserved_1
1276   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1277   // flags
1278   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1279   // reserved_2
1280   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1281   // reserved_3
1282   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1283   // psource
1284   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1285   RD->completeDefinition();
1286   IdentQTy = C.getRecordType(RD);
1287   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1288   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1289 
1290   loadOffloadInfoMetadata();
1291 }
1292 
1293 void CGOpenMPRuntime::clear() {
1294   InternalVars.clear();
1295   // Clean non-target variable declarations possibly used only in debug info.
1296   for (const auto &Data : EmittedNonTargetVariables) {
1297     if (!Data.getValue().pointsToAliveValue())
1298       continue;
1299     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1300     if (!GV)
1301       continue;
1302     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1303       continue;
1304     GV->eraseFromParent();
1305   }
1306 }
1307 
1308 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1309   SmallString<128> Buffer;
1310   llvm::raw_svector_ostream OS(Buffer);
1311   StringRef Sep = FirstSeparator;
1312   for (StringRef Part : Parts) {
1313     OS << Sep << Part;
1314     Sep = Separator;
1315   }
1316   return std::string(OS.str());
1317 }
1318 
1319 static llvm::Function *
1320 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1321                           const Expr *CombinerInitializer, const VarDecl *In,
1322                           const VarDecl *Out, bool IsCombiner) {
1323   // void .omp_combiner.(Ty *in, Ty *out);
1324   ASTContext &C = CGM.getContext();
1325   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1326   FunctionArgList Args;
1327   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1328                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1329   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1330                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1331   Args.push_back(&OmpOutParm);
1332   Args.push_back(&OmpInParm);
1333   const CGFunctionInfo &FnInfo =
1334       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1335   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1336   std::string Name = CGM.getOpenMPRuntime().getName(
1337       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1338   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1339                                     Name, &CGM.getModule());
1340   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1341   if (CGM.getLangOpts().Optimize) {
1342     Fn->removeFnAttr(llvm::Attribute::NoInline);
1343     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1344     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1345   }
1346   CodeGenFunction CGF(CGM);
1347   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1348   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1349   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1350                     Out->getLocation());
1351   CodeGenFunction::OMPPrivateScope Scope(CGF);
1352   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1353   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1354     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1355         .getAddress(CGF);
1356   });
1357   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1358   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1359     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1360         .getAddress(CGF);
1361   });
1362   (void)Scope.Privatize();
1363   if (!IsCombiner && Out->hasInit() &&
1364       !CGF.isTrivialInitializer(Out->getInit())) {
1365     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1366                          Out->getType().getQualifiers(),
1367                          /*IsInitializer=*/true);
1368   }
1369   if (CombinerInitializer)
1370     CGF.EmitIgnoredExpr(CombinerInitializer);
1371   Scope.ForceCleanup();
1372   CGF.FinishFunction();
1373   return Fn;
1374 }
1375 
1376 void CGOpenMPRuntime::emitUserDefinedReduction(
1377     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1378   if (UDRMap.count(D) > 0)
1379     return;
1380   llvm::Function *Combiner = emitCombinerOrInitializer(
1381       CGM, D->getType(), D->getCombiner(),
1382       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1383       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1384       /*IsCombiner=*/true);
1385   llvm::Function *Initializer = nullptr;
1386   if (const Expr *Init = D->getInitializer()) {
1387     Initializer = emitCombinerOrInitializer(
1388         CGM, D->getType(),
1389         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1390                                                                      : nullptr,
1391         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1392         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1393         /*IsCombiner=*/false);
1394   }
1395   UDRMap.try_emplace(D, Combiner, Initializer);
1396   if (CGF) {
1397     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1398     Decls.second.push_back(D);
1399   }
1400 }
1401 
1402 std::pair<llvm::Function *, llvm::Function *>
1403 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1404   auto I = UDRMap.find(D);
1405   if (I != UDRMap.end())
1406     return I->second;
1407   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1408   return UDRMap.lookup(D);
1409 }
1410 
1411 namespace {
1412 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1413 // Builder if one is present.
1414 struct PushAndPopStackRAII {
1415   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1416                       bool HasCancel)
1417       : OMPBuilder(OMPBuilder) {
1418     if (!OMPBuilder)
1419       return;
1420 
1421     // The following callback is the crucial part of clangs cleanup process.
1422     //
1423     // NOTE:
1424     // Once the OpenMPIRBuilder is used to create parallel regions (and
1425     // similar), the cancellation destination (Dest below) is determined via
1426     // IP. That means if we have variables to finalize we split the block at IP,
1427     // use the new block (=BB) as destination to build a JumpDest (via
1428     // getJumpDestInCurrentScope(BB)) which then is fed to
1429     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1430     // to push & pop an FinalizationInfo object.
1431     // The FiniCB will still be needed but at the point where the
1432     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1433     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1434       assert(IP.getBlock()->end() == IP.getPoint() &&
1435              "Clang CG should cause non-terminated block!");
1436       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1437       CGF.Builder.restoreIP(IP);
1438       CodeGenFunction::JumpDest Dest =
1439           CGF.getOMPCancelDestination(OMPD_parallel);
1440       CGF.EmitBranchThroughCleanup(Dest);
1441     };
1442 
1443     // TODO: Remove this once we emit parallel regions through the
1444     //       OpenMPIRBuilder as it can do this setup internally.
1445     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1446         {FiniCB, OMPD_parallel, HasCancel});
1447     OMPBuilder->pushFinalizationCB(std::move(FI));
1448   }
1449   ~PushAndPopStackRAII() {
1450     if (OMPBuilder)
1451       OMPBuilder->popFinalizationCB();
1452   }
1453   llvm::OpenMPIRBuilder *OMPBuilder;
1454 };
1455 } // namespace
1456 
1457 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1458     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1459     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1460     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1461   assert(ThreadIDVar->getType()->isPointerType() &&
1462          "thread id variable must be of type kmp_int32 *");
1463   CodeGenFunction CGF(CGM, true);
1464   bool HasCancel = false;
1465   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1466     HasCancel = OPD->hasCancel();
1467   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1468     HasCancel = OPD->hasCancel();
1469   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1470     HasCancel = OPSD->hasCancel();
1471   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1472     HasCancel = OPFD->hasCancel();
1473   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1474     HasCancel = OPFD->hasCancel();
1475   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1476     HasCancel = OPFD->hasCancel();
1477   else if (const auto *OPFD =
1478                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1479     HasCancel = OPFD->hasCancel();
1480   else if (const auto *OPFD =
1481                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1482     HasCancel = OPFD->hasCancel();
1483 
1484   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1485   //       parallel region to make cancellation barriers work properly.
1486   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1487   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1488   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1489                                     HasCancel, OutlinedHelperName);
1490   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1491   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1492 }
1493 
1494 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1495     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1496     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1497   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1498   return emitParallelOrTeamsOutlinedFunction(
1499       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1500 }
1501 
1502 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1503     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1504     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1505   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1506   return emitParallelOrTeamsOutlinedFunction(
1507       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1508 }
1509 
1510 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1511     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1512     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1513     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1514     bool Tied, unsigned &NumberOfParts) {
1515   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1516                                               PrePostActionTy &) {
1517     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1518     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1519     llvm::Value *TaskArgs[] = {
1520         UpLoc, ThreadID,
1521         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1522                                     TaskTVar->getType()->castAs<PointerType>())
1523             .getPointer(CGF)};
1524     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1525   };
1526   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1527                                                             UntiedCodeGen);
1528   CodeGen.setAction(Action);
1529   assert(!ThreadIDVar->getType()->isPointerType() &&
1530          "thread id variable must be of type kmp_int32 for tasks");
1531   const OpenMPDirectiveKind Region =
1532       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1533                                                       : OMPD_task;
1534   const CapturedStmt *CS = D.getCapturedStmt(Region);
1535   bool HasCancel = false;
1536   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1537     HasCancel = TD->hasCancel();
1538   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1539     HasCancel = TD->hasCancel();
1540   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1541     HasCancel = TD->hasCancel();
1542   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1543     HasCancel = TD->hasCancel();
1544 
1545   CodeGenFunction CGF(CGM, true);
1546   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1547                                         InnermostKind, HasCancel, Action);
1548   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1549   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1550   if (!Tied)
1551     NumberOfParts = Action.getNumberOfParts();
1552   return Res;
1553 }
1554 
1555 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1556                              const RecordDecl *RD, const CGRecordLayout &RL,
1557                              ArrayRef<llvm::Constant *> Data) {
1558   llvm::StructType *StructTy = RL.getLLVMType();
1559   unsigned PrevIdx = 0;
1560   ConstantInitBuilder CIBuilder(CGM);
1561   auto DI = Data.begin();
1562   for (const FieldDecl *FD : RD->fields()) {
1563     unsigned Idx = RL.getLLVMFieldNo(FD);
1564     // Fill the alignment.
1565     for (unsigned I = PrevIdx; I < Idx; ++I)
1566       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1567     PrevIdx = Idx + 1;
1568     Fields.add(*DI);
1569     ++DI;
1570   }
1571 }
1572 
1573 template <class... As>
1574 static llvm::GlobalVariable *
1575 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1576                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1577                    As &&... Args) {
1578   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1579   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1580   ConstantInitBuilder CIBuilder(CGM);
1581   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1582   buildStructValue(Fields, CGM, RD, RL, Data);
1583   return Fields.finishAndCreateGlobal(
1584       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1585       std::forward<As>(Args)...);
1586 }
1587 
1588 template <typename T>
1589 static void
1590 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1591                                          ArrayRef<llvm::Constant *> Data,
1592                                          T &Parent) {
1593   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1594   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1595   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1596   buildStructValue(Fields, CGM, RD, RL, Data);
1597   Fields.finishAndAddTo(Parent);
1598 }
1599 
1600 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1601   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1602   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1603   FlagsTy FlagsKey(Flags, Reserved2Flags);
1604   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1605   if (!Entry) {
1606     if (!DefaultOpenMPPSource) {
1607       // Initialize default location for psource field of ident_t structure of
1608       // all ident_t objects. Format is ";file;function;line;column;;".
1609       // Taken from
1610       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1611       DefaultOpenMPPSource =
1612           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1613       DefaultOpenMPPSource =
1614           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1615     }
1616 
1617     llvm::Constant *Data[] = {
1618         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1619         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1620         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1621         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1622     llvm::GlobalValue *DefaultOpenMPLocation =
1623         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1624                            llvm::GlobalValue::PrivateLinkage);
1625     DefaultOpenMPLocation->setUnnamedAddr(
1626         llvm::GlobalValue::UnnamedAddr::Global);
1627 
1628     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1629   }
1630   return Address(Entry, Align);
1631 }
1632 
1633 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1634                                              bool AtCurrentPoint) {
1635   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1636   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1637 
1638   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1639   if (AtCurrentPoint) {
1640     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1641         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1642   } else {
1643     Elem.second.ServiceInsertPt =
1644         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1645     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1646   }
1647 }
1648 
1649 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1650   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1651   if (Elem.second.ServiceInsertPt) {
1652     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1653     Elem.second.ServiceInsertPt = nullptr;
1654     Ptr->eraseFromParent();
1655   }
1656 }
1657 
1658 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1659                                                  SourceLocation Loc,
1660                                                  unsigned Flags) {
1661   Flags |= OMP_IDENT_KMPC;
1662   // If no debug info is generated - return global default location.
1663   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1664       Loc.isInvalid())
1665     return getOrCreateDefaultLocation(Flags).getPointer();
1666 
1667   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1668 
1669   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1670   Address LocValue = Address::invalid();
1671   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1672   if (I != OpenMPLocThreadIDMap.end())
1673     LocValue = Address(I->second.DebugLoc, Align);
1674 
1675   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1676   // GetOpenMPThreadID was called before this routine.
1677   if (!LocValue.isValid()) {
1678     // Generate "ident_t .kmpc_loc.addr;"
1679     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1680     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1681     Elem.second.DebugLoc = AI.getPointer();
1682     LocValue = AI;
1683 
1684     if (!Elem.second.ServiceInsertPt)
1685       setLocThreadIdInsertPt(CGF);
1686     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1687     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1688     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1689                              CGF.getTypeSize(IdentQTy));
1690   }
1691 
1692   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1693   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1694   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1695   LValue PSource =
1696       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1697 
1698   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1699   if (OMPDebugLoc == nullptr) {
1700     SmallString<128> Buffer2;
1701     llvm::raw_svector_ostream OS2(Buffer2);
1702     // Build debug location
1703     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1704     OS2 << ";" << PLoc.getFilename() << ";";
1705     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1706       OS2 << FD->getQualifiedNameAsString();
1707     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1708     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1709     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1710   }
1711   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1712   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1713 
1714   // Our callers always pass this to a runtime function, so for
1715   // convenience, go ahead and return a naked pointer.
1716   return LocValue.getPointer();
1717 }
1718 
1719 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1720                                           SourceLocation Loc) {
1721   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1722 
1723   llvm::Value *ThreadID = nullptr;
1724   // Check whether we've already cached a load of the thread id in this
1725   // function.
1726   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1727   if (I != OpenMPLocThreadIDMap.end()) {
1728     ThreadID = I->second.ThreadID;
1729     if (ThreadID != nullptr)
1730       return ThreadID;
1731   }
1732   // If exceptions are enabled, do not use parameter to avoid possible crash.
1733   if (auto *OMPRegionInfo =
1734           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1735     if (OMPRegionInfo->getThreadIDVariable()) {
1736       // Check if this an outlined function with thread id passed as argument.
1737       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1738       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1739       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1740           !CGF.getLangOpts().CXXExceptions ||
1741           CGF.Builder.GetInsertBlock() == TopBlock ||
1742           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1743           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1744               TopBlock ||
1745           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1746               CGF.Builder.GetInsertBlock()) {
1747         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1748         // If value loaded in entry block, cache it and use it everywhere in
1749         // function.
1750         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1751           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1752           Elem.second.ThreadID = ThreadID;
1753         }
1754         return ThreadID;
1755       }
1756     }
1757   }
1758 
1759   // This is not an outlined function region - need to call __kmpc_int32
1760   // kmpc_global_thread_num(ident_t *loc).
1761   // Generate thread id value and cache this value for use across the
1762   // function.
1763   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1764   if (!Elem.second.ServiceInsertPt)
1765     setLocThreadIdInsertPt(CGF);
1766   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1767   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1768   llvm::CallInst *Call = CGF.Builder.CreateCall(
1769       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1770       emitUpdateLocation(CGF, Loc));
1771   Call->setCallingConv(CGF.getRuntimeCC());
1772   Elem.second.ThreadID = Call;
1773   return Call;
1774 }
1775 
1776 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1777   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1778   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1779     clearLocThreadIdInsertPt(CGF);
1780     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1781   }
1782   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1783     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1784       UDRMap.erase(D);
1785     FunctionUDRMap.erase(CGF.CurFn);
1786   }
1787   auto I = FunctionUDMMap.find(CGF.CurFn);
1788   if (I != FunctionUDMMap.end()) {
1789     for(const auto *D : I->second)
1790       UDMMap.erase(D);
1791     FunctionUDMMap.erase(I);
1792   }
1793   LastprivateConditionalToTypes.erase(CGF.CurFn);
1794 }
1795 
1796 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1797   return IdentTy->getPointerTo();
1798 }
1799 
1800 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1801   if (!Kmpc_MicroTy) {
1802     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1803     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1804                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1805     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1806   }
1807   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1808 }
1809 
1810 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1811   llvm::FunctionCallee RTLFn = nullptr;
1812   switch (static_cast<OpenMPRTLFunction>(Function)) {
1813   case OMPRTL__kmpc_fork_call: {
1814     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1815     // microtask, ...);
1816     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1817                                 getKmpc_MicroPointerTy()};
1818     auto *FnTy =
1819         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1820     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1821     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1822       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1823         llvm::LLVMContext &Ctx = F->getContext();
1824         llvm::MDBuilder MDB(Ctx);
1825         // Annotate the callback behavior of the __kmpc_fork_call:
1826         //  - The callback callee is argument number 2 (microtask).
1827         //  - The first two arguments of the callback callee are unknown (-1).
1828         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1829         //    callback callee.
1830         F->addMetadata(
1831             llvm::LLVMContext::MD_callback,
1832             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1833                                         2, {-1, -1},
1834                                         /* VarArgsArePassed */ true)}));
1835       }
1836     }
1837     break;
1838   }
1839   case OMPRTL__kmpc_global_thread_num: {
1840     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1841     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1842     auto *FnTy =
1843         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1844     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1845     break;
1846   }
1847   case OMPRTL__kmpc_threadprivate_cached: {
1848     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1849     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1850     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1851                                 CGM.VoidPtrTy, CGM.SizeTy,
1852                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1853     auto *FnTy =
1854         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1855     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1856     break;
1857   }
1858   case OMPRTL__kmpc_critical: {
1859     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1860     // kmp_critical_name *crit);
1861     llvm::Type *TypeParams[] = {
1862         getIdentTyPointerTy(), CGM.Int32Ty,
1863         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1864     auto *FnTy =
1865         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1867     break;
1868   }
1869   case OMPRTL__kmpc_critical_with_hint: {
1870     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1871     // kmp_critical_name *crit, uintptr_t hint);
1872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1873                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1874                                 CGM.IntPtrTy};
1875     auto *FnTy =
1876         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1877     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1878     break;
1879   }
1880   case OMPRTL__kmpc_threadprivate_register: {
1881     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1882     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1883     // typedef void *(*kmpc_ctor)(void *);
1884     auto *KmpcCtorTy =
1885         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1886                                 /*isVarArg*/ false)->getPointerTo();
1887     // typedef void *(*kmpc_cctor)(void *, void *);
1888     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1889     auto *KmpcCopyCtorTy =
1890         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1891                                 /*isVarArg*/ false)
1892             ->getPointerTo();
1893     // typedef void (*kmpc_dtor)(void *);
1894     auto *KmpcDtorTy =
1895         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1896             ->getPointerTo();
1897     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1898                               KmpcCopyCtorTy, KmpcDtorTy};
1899     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1900                                         /*isVarArg*/ false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1902     break;
1903   }
1904   case OMPRTL__kmpc_end_critical: {
1905     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1906     // kmp_critical_name *crit);
1907     llvm::Type *TypeParams[] = {
1908         getIdentTyPointerTy(), CGM.Int32Ty,
1909         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1913     break;
1914   }
1915   case OMPRTL__kmpc_cancel_barrier: {
1916     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1917     // global_tid);
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1919     auto *FnTy =
1920         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1921     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1922     break;
1923   }
1924   case OMPRTL__kmpc_barrier: {
1925     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1926     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1927     auto *FnTy =
1928         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1929     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1930     break;
1931   }
1932   case OMPRTL__kmpc_for_static_fini: {
1933     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1934     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1935     auto *FnTy =
1936         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1937     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1938     break;
1939   }
1940   case OMPRTL__kmpc_push_num_threads: {
1941     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1942     // kmp_int32 num_threads)
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1944                                 CGM.Int32Ty};
1945     auto *FnTy =
1946         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1947     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1948     break;
1949   }
1950   case OMPRTL__kmpc_serialized_parallel: {
1951     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1952     // global_tid);
1953     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954     auto *FnTy =
1955         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1957     break;
1958   }
1959   case OMPRTL__kmpc_end_serialized_parallel: {
1960     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1961     // global_tid);
1962     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1963     auto *FnTy =
1964         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1965     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1966     break;
1967   }
1968   case OMPRTL__kmpc_flush: {
1969     // Build void __kmpc_flush(ident_t *loc);
1970     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1971     auto *FnTy =
1972         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1973     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1974     break;
1975   }
1976   case OMPRTL__kmpc_master: {
1977     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1978     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1981     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1982     break;
1983   }
1984   case OMPRTL__kmpc_end_master: {
1985     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1986     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1987     auto *FnTy =
1988         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1989     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1990     break;
1991   }
1992   case OMPRTL__kmpc_omp_taskyield: {
1993     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1994     // int end_part);
1995     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1996     auto *FnTy =
1997         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1998     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1999     break;
2000   }
2001   case OMPRTL__kmpc_single: {
2002     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2003     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2004     auto *FnTy =
2005         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2006     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2007     break;
2008   }
2009   case OMPRTL__kmpc_end_single: {
2010     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2011     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2012     auto *FnTy =
2013         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2014     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2015     break;
2016   }
2017   case OMPRTL__kmpc_omp_task_alloc: {
2018     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2019     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2020     // kmp_routine_entry_t *task_entry);
2021     assert(KmpRoutineEntryPtrTy != nullptr &&
2022            "Type kmp_routine_entry_t must be created.");
2023     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2024                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2025     // Return void * and then cast to particular kmp_task_t type.
2026     auto *FnTy =
2027         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2028     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2029     break;
2030   }
2031   case OMPRTL__kmpc_omp_target_task_alloc: {
2032     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2033     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2034     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2035     assert(KmpRoutineEntryPtrTy != nullptr &&
2036            "Type kmp_routine_entry_t must be created.");
2037     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2038                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2039                                 CGM.Int64Ty};
2040     // Return void * and then cast to particular kmp_task_t type.
2041     auto *FnTy =
2042         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2043     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2044     break;
2045   }
2046   case OMPRTL__kmpc_omp_task: {
2047     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2048     // *new_task);
2049     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2050                                 CGM.VoidPtrTy};
2051     auto *FnTy =
2052         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2053     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2054     break;
2055   }
2056   case OMPRTL__kmpc_copyprivate: {
2057     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2058     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2059     // kmp_int32 didit);
2060     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2061     auto *CpyFnTy =
2062         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2063     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2064                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2065                                 CGM.Int32Ty};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2069     break;
2070   }
2071   case OMPRTL__kmpc_reduce: {
2072     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2073     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2074     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2075     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2076     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2077                                                /*isVarArg=*/false);
2078     llvm::Type *TypeParams[] = {
2079         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2080         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2081         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2085     break;
2086   }
2087   case OMPRTL__kmpc_reduce_nowait: {
2088     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2089     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2090     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2091     // *lck);
2092     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2093     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2094                                                /*isVarArg=*/false);
2095     llvm::Type *TypeParams[] = {
2096         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2097         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2098         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2102     break;
2103   }
2104   case OMPRTL__kmpc_end_reduce: {
2105     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2106     // kmp_critical_name *lck);
2107     llvm::Type *TypeParams[] = {
2108         getIdentTyPointerTy(), CGM.Int32Ty,
2109         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2110     auto *FnTy =
2111         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2112     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2113     break;
2114   }
2115   case OMPRTL__kmpc_end_reduce_nowait: {
2116     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2117     // kmp_critical_name *lck);
2118     llvm::Type *TypeParams[] = {
2119         getIdentTyPointerTy(), CGM.Int32Ty,
2120         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2121     auto *FnTy =
2122         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2123     RTLFn =
2124         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2125     break;
2126   }
2127   case OMPRTL__kmpc_omp_task_begin_if0: {
2128     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2129     // *new_task);
2130     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2131                                 CGM.VoidPtrTy};
2132     auto *FnTy =
2133         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2134     RTLFn =
2135         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2136     break;
2137   }
2138   case OMPRTL__kmpc_omp_task_complete_if0: {
2139     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2140     // *new_task);
2141     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2142                                 CGM.VoidPtrTy};
2143     auto *FnTy =
2144         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2145     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2146                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2147     break;
2148   }
2149   case OMPRTL__kmpc_ordered: {
2150     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2151     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2155     break;
2156   }
2157   case OMPRTL__kmpc_end_ordered: {
2158     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2159     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2160     auto *FnTy =
2161         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2162     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2163     break;
2164   }
2165   case OMPRTL__kmpc_omp_taskwait: {
2166     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2167     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2168     auto *FnTy =
2169         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2170     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2171     break;
2172   }
2173   case OMPRTL__kmpc_taskgroup: {
2174     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2176     auto *FnTy =
2177         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2178     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2179     break;
2180   }
2181   case OMPRTL__kmpc_end_taskgroup: {
2182     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2183     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2184     auto *FnTy =
2185         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2186     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2187     break;
2188   }
2189   case OMPRTL__kmpc_push_proc_bind: {
2190     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2191     // int proc_bind)
2192     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2193     auto *FnTy =
2194         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2195     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2196     break;
2197   }
2198   case OMPRTL__kmpc_omp_task_with_deps: {
2199     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2200     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2201     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2202     llvm::Type *TypeParams[] = {
2203         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2204         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2205     auto *FnTy =
2206         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2207     RTLFn =
2208         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2209     break;
2210   }
2211   case OMPRTL__kmpc_omp_wait_deps: {
2212     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2213     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2214     // kmp_depend_info_t *noalias_dep_list);
2215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2217                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2221     break;
2222   }
2223   case OMPRTL__kmpc_cancellationpoint: {
2224     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2225     // global_tid, kmp_int32 cncl_kind)
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2230     break;
2231   }
2232   case OMPRTL__kmpc_cancel: {
2233     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2234     // kmp_int32 cncl_kind)
2235     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2238     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2239     break;
2240   }
2241   case OMPRTL__kmpc_push_num_teams: {
2242     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2243     // kmp_int32 num_teams, kmp_int32 num_threads)
2244     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2245         CGM.Int32Ty};
2246     auto *FnTy =
2247         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2249     break;
2250   }
2251   case OMPRTL__kmpc_fork_teams: {
2252     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2253     // microtask, ...);
2254     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2255                                 getKmpc_MicroPointerTy()};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2259     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2260       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2261         llvm::LLVMContext &Ctx = F->getContext();
2262         llvm::MDBuilder MDB(Ctx);
2263         // Annotate the callback behavior of the __kmpc_fork_teams:
2264         //  - The callback callee is argument number 2 (microtask).
2265         //  - The first two arguments of the callback callee are unknown (-1).
2266         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2267         //    callback callee.
2268         F->addMetadata(
2269             llvm::LLVMContext::MD_callback,
2270             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2271                                         2, {-1, -1},
2272                                         /* VarArgsArePassed */ true)}));
2273       }
2274     }
2275     break;
2276   }
2277   case OMPRTL__kmpc_taskloop: {
2278     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2279     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2280     // sched, kmp_uint64 grainsize, void *task_dup);
2281     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2282                                 CGM.IntTy,
2283                                 CGM.VoidPtrTy,
2284                                 CGM.IntTy,
2285                                 CGM.Int64Ty->getPointerTo(),
2286                                 CGM.Int64Ty->getPointerTo(),
2287                                 CGM.Int64Ty,
2288                                 CGM.IntTy,
2289                                 CGM.IntTy,
2290                                 CGM.Int64Ty,
2291                                 CGM.VoidPtrTy};
2292     auto *FnTy =
2293         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2294     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2295     break;
2296   }
2297   case OMPRTL__kmpc_doacross_init: {
2298     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2299     // num_dims, struct kmp_dim *dims);
2300     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2301                                 CGM.Int32Ty,
2302                                 CGM.Int32Ty,
2303                                 CGM.VoidPtrTy};
2304     auto *FnTy =
2305         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2306     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2307     break;
2308   }
2309   case OMPRTL__kmpc_doacross_fini: {
2310     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2311     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2312     auto *FnTy =
2313         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2314     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2315     break;
2316   }
2317   case OMPRTL__kmpc_doacross_post: {
2318     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2319     // *vec);
2320     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2321                                 CGM.Int64Ty->getPointerTo()};
2322     auto *FnTy =
2323         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2324     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2325     break;
2326   }
2327   case OMPRTL__kmpc_doacross_wait: {
2328     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2329     // *vec);
2330     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2331                                 CGM.Int64Ty->getPointerTo()};
2332     auto *FnTy =
2333         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2334     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2335     break;
2336   }
2337   case OMPRTL__kmpc_taskred_init: {
2338     // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data);
2339     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2340     auto *FnTy =
2341         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2342     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init");
2343     break;
2344   }
2345   case OMPRTL__kmpc_task_reduction_get_th_data: {
2346     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2347     // *d);
2348     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2349     auto *FnTy =
2350         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2351     RTLFn = CGM.CreateRuntimeFunction(
2352         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2353     break;
2354   }
2355   case OMPRTL__kmpc_taskred_modifier_init: {
2356     // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
2357     // is_ws, int num_data, void *data);
2358     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
2359                                 CGM.IntTy, CGM.VoidPtrTy};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2363                                       /*Name=*/"__kmpc_taskred_modifier_init");
2364     break;
2365   }
2366   case OMPRTL__kmpc_task_reduction_modifier_fini: {
2367     // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
2368     // int is_ws);
2369     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2372     RTLFn = CGM.CreateRuntimeFunction(
2373         FnTy,
2374         /*Name=*/"__kmpc_task_reduction_modifier_fini");
2375     break;
2376   }
2377   case OMPRTL__kmpc_alloc: {
2378     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2379     // al); omp_allocator_handle_t type is void *.
2380     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2381     auto *FnTy =
2382         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2383     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2384     break;
2385   }
2386   case OMPRTL__kmpc_free: {
2387     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2388     // al); omp_allocator_handle_t type is void *.
2389     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2390     auto *FnTy =
2391         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2392     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2393     break;
2394   }
2395   case OMPRTL__kmpc_push_target_tripcount: {
2396     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2397     // size);
2398     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2399     llvm::FunctionType *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2402     break;
2403   }
2404   case OMPRTL__tgt_target: {
2405     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2406     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2407     // *arg_types);
2408     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2409                                 CGM.VoidPtrTy,
2410                                 CGM.Int32Ty,
2411                                 CGM.VoidPtrPtrTy,
2412                                 CGM.VoidPtrPtrTy,
2413                                 CGM.Int64Ty->getPointerTo(),
2414                                 CGM.Int64Ty->getPointerTo()};
2415     auto *FnTy =
2416         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2417     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2418     break;
2419   }
2420   case OMPRTL__tgt_target_nowait: {
2421     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2422     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2423     // int64_t *arg_types);
2424     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2425                                 CGM.VoidPtrTy,
2426                                 CGM.Int32Ty,
2427                                 CGM.VoidPtrPtrTy,
2428                                 CGM.VoidPtrPtrTy,
2429                                 CGM.Int64Ty->getPointerTo(),
2430                                 CGM.Int64Ty->getPointerTo()};
2431     auto *FnTy =
2432         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2433     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2434     break;
2435   }
2436   case OMPRTL__tgt_target_teams: {
2437     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2438     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2439     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2440     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2441                                 CGM.VoidPtrTy,
2442                                 CGM.Int32Ty,
2443                                 CGM.VoidPtrPtrTy,
2444                                 CGM.VoidPtrPtrTy,
2445                                 CGM.Int64Ty->getPointerTo(),
2446                                 CGM.Int64Ty->getPointerTo(),
2447                                 CGM.Int32Ty,
2448                                 CGM.Int32Ty};
2449     auto *FnTy =
2450         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2451     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2452     break;
2453   }
2454   case OMPRTL__tgt_target_teams_nowait: {
2455     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2456     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2457     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2458     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2459                                 CGM.VoidPtrTy,
2460                                 CGM.Int32Ty,
2461                                 CGM.VoidPtrPtrTy,
2462                                 CGM.VoidPtrPtrTy,
2463                                 CGM.Int64Ty->getPointerTo(),
2464                                 CGM.Int64Ty->getPointerTo(),
2465                                 CGM.Int32Ty,
2466                                 CGM.Int32Ty};
2467     auto *FnTy =
2468         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2469     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2470     break;
2471   }
2472   case OMPRTL__tgt_register_requires: {
2473     // Build void __tgt_register_requires(int64_t flags);
2474     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2475     auto *FnTy =
2476         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2477     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2478     break;
2479   }
2480   case OMPRTL__tgt_target_data_begin: {
2481     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2482     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2483     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2484                                 CGM.Int32Ty,
2485                                 CGM.VoidPtrPtrTy,
2486                                 CGM.VoidPtrPtrTy,
2487                                 CGM.Int64Ty->getPointerTo(),
2488                                 CGM.Int64Ty->getPointerTo()};
2489     auto *FnTy =
2490         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2491     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2492     break;
2493   }
2494   case OMPRTL__tgt_target_data_begin_nowait: {
2495     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2496     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2497     // *arg_types);
2498     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2499                                 CGM.Int32Ty,
2500                                 CGM.VoidPtrPtrTy,
2501                                 CGM.VoidPtrPtrTy,
2502                                 CGM.Int64Ty->getPointerTo(),
2503                                 CGM.Int64Ty->getPointerTo()};
2504     auto *FnTy =
2505         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2506     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2507     break;
2508   }
2509   case OMPRTL__tgt_target_data_end: {
2510     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2511     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2512     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2513                                 CGM.Int32Ty,
2514                                 CGM.VoidPtrPtrTy,
2515                                 CGM.VoidPtrPtrTy,
2516                                 CGM.Int64Ty->getPointerTo(),
2517                                 CGM.Int64Ty->getPointerTo()};
2518     auto *FnTy =
2519         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2520     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2521     break;
2522   }
2523   case OMPRTL__tgt_target_data_end_nowait: {
2524     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2525     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2526     // *arg_types);
2527     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2528                                 CGM.Int32Ty,
2529                                 CGM.VoidPtrPtrTy,
2530                                 CGM.VoidPtrPtrTy,
2531                                 CGM.Int64Ty->getPointerTo(),
2532                                 CGM.Int64Ty->getPointerTo()};
2533     auto *FnTy =
2534         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2535     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2536     break;
2537   }
2538   case OMPRTL__tgt_target_data_update: {
2539     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2540     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2541     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2542                                 CGM.Int32Ty,
2543                                 CGM.VoidPtrPtrTy,
2544                                 CGM.VoidPtrPtrTy,
2545                                 CGM.Int64Ty->getPointerTo(),
2546                                 CGM.Int64Ty->getPointerTo()};
2547     auto *FnTy =
2548         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2549     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2550     break;
2551   }
2552   case OMPRTL__tgt_target_data_update_nowait: {
2553     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2554     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2555     // *arg_types);
2556     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2557                                 CGM.Int32Ty,
2558                                 CGM.VoidPtrPtrTy,
2559                                 CGM.VoidPtrPtrTy,
2560                                 CGM.Int64Ty->getPointerTo(),
2561                                 CGM.Int64Ty->getPointerTo()};
2562     auto *FnTy =
2563         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2564     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2565     break;
2566   }
2567   case OMPRTL__tgt_mapper_num_components: {
2568     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2569     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2570     auto *FnTy =
2571         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2572     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2573     break;
2574   }
2575   case OMPRTL__tgt_push_mapper_component: {
2576     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2577     // *base, void *begin, int64_t size, int64_t type);
2578     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2579                                 CGM.Int64Ty, CGM.Int64Ty};
2580     auto *FnTy =
2581         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2582     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2583     break;
2584   }
2585   case OMPRTL__kmpc_task_allow_completion_event: {
2586     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
2587     // int gtid, kmp_task_t *task);
2588     auto *FnTy = llvm::FunctionType::get(
2589         CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy},
2590         /*isVarArg=*/false);
2591     RTLFn =
2592         CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event");
2593     break;
2594   }
2595   }
2596   assert(RTLFn && "Unable to find OpenMP runtime function");
2597   return RTLFn;
2598 }
2599 
2600 llvm::FunctionCallee
2601 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2602   assert((IVSize == 32 || IVSize == 64) &&
2603          "IV size is not compatible with the omp runtime");
2604   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2605                                             : "__kmpc_for_static_init_4u")
2606                                 : (IVSigned ? "__kmpc_for_static_init_8"
2607                                             : "__kmpc_for_static_init_8u");
2608   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2609   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2610   llvm::Type *TypeParams[] = {
2611     getIdentTyPointerTy(),                     // loc
2612     CGM.Int32Ty,                               // tid
2613     CGM.Int32Ty,                               // schedtype
2614     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2615     PtrTy,                                     // p_lower
2616     PtrTy,                                     // p_upper
2617     PtrTy,                                     // p_stride
2618     ITy,                                       // incr
2619     ITy                                        // chunk
2620   };
2621   auto *FnTy =
2622       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2623   return CGM.CreateRuntimeFunction(FnTy, Name);
2624 }
2625 
2626 llvm::FunctionCallee
2627 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2628   assert((IVSize == 32 || IVSize == 64) &&
2629          "IV size is not compatible with the omp runtime");
2630   StringRef Name =
2631       IVSize == 32
2632           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2633           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2634   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2635   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2636                                CGM.Int32Ty,           // tid
2637                                CGM.Int32Ty,           // schedtype
2638                                ITy,                   // lower
2639                                ITy,                   // upper
2640                                ITy,                   // stride
2641                                ITy                    // chunk
2642   };
2643   auto *FnTy =
2644       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2645   return CGM.CreateRuntimeFunction(FnTy, Name);
2646 }
2647 
2648 llvm::FunctionCallee
2649 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2650   assert((IVSize == 32 || IVSize == 64) &&
2651          "IV size is not compatible with the omp runtime");
2652   StringRef Name =
2653       IVSize == 32
2654           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2655           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2656   llvm::Type *TypeParams[] = {
2657       getIdentTyPointerTy(), // loc
2658       CGM.Int32Ty,           // tid
2659   };
2660   auto *FnTy =
2661       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2662   return CGM.CreateRuntimeFunction(FnTy, Name);
2663 }
2664 
2665 llvm::FunctionCallee
2666 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2667   assert((IVSize == 32 || IVSize == 64) &&
2668          "IV size is not compatible with the omp runtime");
2669   StringRef Name =
2670       IVSize == 32
2671           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2672           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2673   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2674   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2675   llvm::Type *TypeParams[] = {
2676     getIdentTyPointerTy(),                     // loc
2677     CGM.Int32Ty,                               // tid
2678     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2679     PtrTy,                                     // p_lower
2680     PtrTy,                                     // p_upper
2681     PtrTy                                      // p_stride
2682   };
2683   auto *FnTy =
2684       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2685   return CGM.CreateRuntimeFunction(FnTy, Name);
2686 }
2687 
2688 /// Obtain information that uniquely identifies a target entry. This
2689 /// consists of the file and device IDs as well as line number associated with
2690 /// the relevant entry source location.
2691 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2692                                      unsigned &DeviceID, unsigned &FileID,
2693                                      unsigned &LineNum) {
2694   SourceManager &SM = C.getSourceManager();
2695 
2696   // The loc should be always valid and have a file ID (the user cannot use
2697   // #pragma directives in macros)
2698 
2699   assert(Loc.isValid() && "Source location is expected to be always valid.");
2700 
2701   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2702   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2703 
2704   llvm::sys::fs::UniqueID ID;
2705   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2706     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2707         << PLoc.getFilename() << EC.message();
2708 
2709   DeviceID = ID.getDevice();
2710   FileID = ID.getFile();
2711   LineNum = PLoc.getLine();
2712 }
2713 
2714 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2715   if (CGM.getLangOpts().OpenMPSimd)
2716     return Address::invalid();
2717   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2718       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2719   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2720               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2721                HasRequiresUnifiedSharedMemory))) {
2722     SmallString<64> PtrName;
2723     {
2724       llvm::raw_svector_ostream OS(PtrName);
2725       OS << CGM.getMangledName(GlobalDecl(VD));
2726       if (!VD->isExternallyVisible()) {
2727         unsigned DeviceID, FileID, Line;
2728         getTargetEntryUniqueInfo(CGM.getContext(),
2729                                  VD->getCanonicalDecl()->getBeginLoc(),
2730                                  DeviceID, FileID, Line);
2731         OS << llvm::format("_%x", FileID);
2732       }
2733       OS << "_decl_tgt_ref_ptr";
2734     }
2735     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2736     if (!Ptr) {
2737       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2738       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2739                                         PtrName);
2740 
2741       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2742       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2743 
2744       if (!CGM.getLangOpts().OpenMPIsDevice)
2745         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2746       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2747     }
2748     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2749   }
2750   return Address::invalid();
2751 }
2752 
2753 llvm::Constant *
2754 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2755   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2756          !CGM.getContext().getTargetInfo().isTLSSupported());
2757   // Lookup the entry, lazily creating it if necessary.
2758   std::string Suffix = getName({"cache", ""});
2759   return getOrCreateInternalVariable(
2760       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2761 }
2762 
2763 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2764                                                 const VarDecl *VD,
2765                                                 Address VDAddr,
2766                                                 SourceLocation Loc) {
2767   if (CGM.getLangOpts().OpenMPUseTLS &&
2768       CGM.getContext().getTargetInfo().isTLSSupported())
2769     return VDAddr;
2770 
2771   llvm::Type *VarTy = VDAddr.getElementType();
2772   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2773                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2774                                                        CGM.Int8PtrTy),
2775                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2776                          getOrCreateThreadPrivateCache(VD)};
2777   return Address(CGF.EmitRuntimeCall(
2778       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2779                  VDAddr.getAlignment());
2780 }
2781 
2782 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2783     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2784     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2785   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2786   // library.
2787   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2788   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2789                       OMPLoc);
2790   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2791   // to register constructor/destructor for variable.
2792   llvm::Value *Args[] = {
2793       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2794       Ctor, CopyCtor, Dtor};
2795   CGF.EmitRuntimeCall(
2796       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2797 }
2798 
2799 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2800     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2801     bool PerformInit, CodeGenFunction *CGF) {
2802   if (CGM.getLangOpts().OpenMPUseTLS &&
2803       CGM.getContext().getTargetInfo().isTLSSupported())
2804     return nullptr;
2805 
2806   VD = VD->getDefinition(CGM.getContext());
2807   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2808     QualType ASTTy = VD->getType();
2809 
2810     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2811     const Expr *Init = VD->getAnyInitializer();
2812     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2813       // Generate function that re-emits the declaration's initializer into the
2814       // threadprivate copy of the variable VD
2815       CodeGenFunction CtorCGF(CGM);
2816       FunctionArgList Args;
2817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2819                             ImplicitParamDecl::Other);
2820       Args.push_back(&Dst);
2821 
2822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2823           CGM.getContext().VoidPtrTy, Args);
2824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2825       std::string Name = getName({"__kmpc_global_ctor_", ""});
2826       llvm::Function *Fn =
2827           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2828       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2829                             Args, Loc, Loc);
2830       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2831           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2832           CGM.getContext().VoidPtrTy, Dst.getLocation());
2833       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2834       Arg = CtorCGF.Builder.CreateElementBitCast(
2835           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2836       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2837                                /*IsInitializer=*/true);
2838       ArgVal = CtorCGF.EmitLoadOfScalar(
2839           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2840           CGM.getContext().VoidPtrTy, Dst.getLocation());
2841       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2842       CtorCGF.FinishFunction();
2843       Ctor = Fn;
2844     }
2845     if (VD->getType().isDestructedType() != QualType::DK_none) {
2846       // Generate function that emits destructor call for the threadprivate copy
2847       // of the variable VD
2848       CodeGenFunction DtorCGF(CGM);
2849       FunctionArgList Args;
2850       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2851                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2852                             ImplicitParamDecl::Other);
2853       Args.push_back(&Dst);
2854 
2855       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2856           CGM.getContext().VoidTy, Args);
2857       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2858       std::string Name = getName({"__kmpc_global_dtor_", ""});
2859       llvm::Function *Fn =
2860           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2861       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2862       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2863                             Loc, Loc);
2864       // Create a scope with an artificial location for the body of this function.
2865       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2866       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2867           DtorCGF.GetAddrOfLocalVar(&Dst),
2868           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2869       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2870                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2871                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2872       DtorCGF.FinishFunction();
2873       Dtor = Fn;
2874     }
2875     // Do not emit init function if it is not required.
2876     if (!Ctor && !Dtor)
2877       return nullptr;
2878 
2879     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2880     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2881                                                /*isVarArg=*/false)
2882                            ->getPointerTo();
2883     // Copying constructor for the threadprivate variable.
2884     // Must be NULL - reserved by runtime, but currently it requires that this
2885     // parameter is always NULL. Otherwise it fires assertion.
2886     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2887     if (Ctor == nullptr) {
2888       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2889                                              /*isVarArg=*/false)
2890                          ->getPointerTo();
2891       Ctor = llvm::Constant::getNullValue(CtorTy);
2892     }
2893     if (Dtor == nullptr) {
2894       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2895                                              /*isVarArg=*/false)
2896                          ->getPointerTo();
2897       Dtor = llvm::Constant::getNullValue(DtorTy);
2898     }
2899     if (!CGF) {
2900       auto *InitFunctionTy =
2901           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2902       std::string Name = getName({"__omp_threadprivate_init_", ""});
2903       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2904           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2905       CodeGenFunction InitCGF(CGM);
2906       FunctionArgList ArgList;
2907       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2908                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2909                             Loc, Loc);
2910       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2911       InitCGF.FinishFunction();
2912       return InitFunction;
2913     }
2914     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2915   }
2916   return nullptr;
2917 }
2918 
2919 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2920                                                      llvm::GlobalVariable *Addr,
2921                                                      bool PerformInit) {
2922   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2923       !CGM.getLangOpts().OpenMPIsDevice)
2924     return false;
2925   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2926       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2927   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2928       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2929        HasRequiresUnifiedSharedMemory))
2930     return CGM.getLangOpts().OpenMPIsDevice;
2931   VD = VD->getDefinition(CGM.getContext());
2932   assert(VD && "Unknown VarDecl");
2933 
2934   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2935     return CGM.getLangOpts().OpenMPIsDevice;
2936 
2937   QualType ASTTy = VD->getType();
2938   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2939 
2940   // Produce the unique prefix to identify the new target regions. We use
2941   // the source location of the variable declaration which we know to not
2942   // conflict with any target region.
2943   unsigned DeviceID;
2944   unsigned FileID;
2945   unsigned Line;
2946   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2947   SmallString<128> Buffer, Out;
2948   {
2949     llvm::raw_svector_ostream OS(Buffer);
2950     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2951        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2952   }
2953 
2954   const Expr *Init = VD->getAnyInitializer();
2955   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2956     llvm::Constant *Ctor;
2957     llvm::Constant *ID;
2958     if (CGM.getLangOpts().OpenMPIsDevice) {
2959       // Generate function that re-emits the declaration's initializer into
2960       // the threadprivate copy of the variable VD
2961       CodeGenFunction CtorCGF(CGM);
2962 
2963       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2964       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2965       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2966           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2967       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2968       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2969                             FunctionArgList(), Loc, Loc);
2970       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2971       CtorCGF.EmitAnyExprToMem(Init,
2972                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2973                                Init->getType().getQualifiers(),
2974                                /*IsInitializer=*/true);
2975       CtorCGF.FinishFunction();
2976       Ctor = Fn;
2977       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2978       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2979     } else {
2980       Ctor = new llvm::GlobalVariable(
2981           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2982           llvm::GlobalValue::PrivateLinkage,
2983           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2984       ID = Ctor;
2985     }
2986 
2987     // Register the information for the entry associated with the constructor.
2988     Out.clear();
2989     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2990         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2991         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2992   }
2993   if (VD->getType().isDestructedType() != QualType::DK_none) {
2994     llvm::Constant *Dtor;
2995     llvm::Constant *ID;
2996     if (CGM.getLangOpts().OpenMPIsDevice) {
2997       // Generate function that emits destructor call for the threadprivate
2998       // copy of the variable VD
2999       CodeGenFunction DtorCGF(CGM);
3000 
3001       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3002       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3003       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3004           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3005       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3006       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3007                             FunctionArgList(), Loc, Loc);
3008       // Create a scope with an artificial location for the body of this
3009       // function.
3010       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3011       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3012                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3013                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3014       DtorCGF.FinishFunction();
3015       Dtor = Fn;
3016       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3017       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3018     } else {
3019       Dtor = new llvm::GlobalVariable(
3020           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3021           llvm::GlobalValue::PrivateLinkage,
3022           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3023       ID = Dtor;
3024     }
3025     // Register the information for the entry associated with the destructor.
3026     Out.clear();
3027     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3028         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3029         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3030   }
3031   return CGM.getLangOpts().OpenMPIsDevice;
3032 }
3033 
3034 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3035                                                           QualType VarType,
3036                                                           StringRef Name) {
3037   std::string Suffix = getName({"artificial", ""});
3038   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3039   llvm::Value *GAddr =
3040       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3041   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3042       CGM.getTarget().isTLSSupported()) {
3043     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3044     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3045   }
3046   std::string CacheSuffix = getName({"cache", ""});
3047   llvm::Value *Args[] = {
3048       emitUpdateLocation(CGF, SourceLocation()),
3049       getThreadID(CGF, SourceLocation()),
3050       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3051       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3052                                 /*isSigned=*/false),
3053       getOrCreateInternalVariable(
3054           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3055   return Address(
3056       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3057           CGF.EmitRuntimeCall(
3058               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3059           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3060       CGM.getContext().getTypeAlignInChars(VarType));
3061 }
3062 
3063 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3064                                    const RegionCodeGenTy &ThenGen,
3065                                    const RegionCodeGenTy &ElseGen) {
3066   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3067 
3068   // If the condition constant folds and can be elided, try to avoid emitting
3069   // the condition and the dead arm of the if/else.
3070   bool CondConstant;
3071   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3072     if (CondConstant)
3073       ThenGen(CGF);
3074     else
3075       ElseGen(CGF);
3076     return;
3077   }
3078 
3079   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3080   // emit the conditional branch.
3081   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3082   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3083   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3084   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3085 
3086   // Emit the 'then' code.
3087   CGF.EmitBlock(ThenBlock);
3088   ThenGen(CGF);
3089   CGF.EmitBranch(ContBlock);
3090   // Emit the 'else' code if present.
3091   // There is no need to emit line number for unconditional branch.
3092   (void)ApplyDebugLocation::CreateEmpty(CGF);
3093   CGF.EmitBlock(ElseBlock);
3094   ElseGen(CGF);
3095   // There is no need to emit line number for unconditional branch.
3096   (void)ApplyDebugLocation::CreateEmpty(CGF);
3097   CGF.EmitBranch(ContBlock);
3098   // Emit the continuation block for code after the if.
3099   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3100 }
3101 
3102 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3103                                        llvm::Function *OutlinedFn,
3104                                        ArrayRef<llvm::Value *> CapturedVars,
3105                                        const Expr *IfCond) {
3106   if (!CGF.HaveInsertPoint())
3107     return;
3108   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3109   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3110                                                      PrePostActionTy &) {
3111     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3112     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3113     llvm::Value *Args[] = {
3114         RTLoc,
3115         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3116         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3117     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3118     RealArgs.append(std::begin(Args), std::end(Args));
3119     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3120 
3121     llvm::FunctionCallee RTLFn =
3122         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3123     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3124   };
3125   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3126                                                           PrePostActionTy &) {
3127     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3128     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3129     // Build calls:
3130     // __kmpc_serialized_parallel(&Loc, GTid);
3131     llvm::Value *Args[] = {RTLoc, ThreadID};
3132     CGF.EmitRuntimeCall(
3133         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3134 
3135     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3136     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3137     Address ZeroAddrBound =
3138         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3139                                          /*Name=*/".bound.zero.addr");
3140     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3141     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3142     // ThreadId for serialized parallels is 0.
3143     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3144     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3145     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3146     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3147 
3148     // __kmpc_end_serialized_parallel(&Loc, GTid);
3149     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3150     CGF.EmitRuntimeCall(
3151         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3152         EndArgs);
3153   };
3154   if (IfCond) {
3155     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3156   } else {
3157     RegionCodeGenTy ThenRCG(ThenGen);
3158     ThenRCG(CGF);
3159   }
3160 }
3161 
3162 // If we're inside an (outlined) parallel region, use the region info's
3163 // thread-ID variable (it is passed in a first argument of the outlined function
3164 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3165 // regular serial code region, get thread ID by calling kmp_int32
3166 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3167 // return the address of that temp.
3168 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3169                                              SourceLocation Loc) {
3170   if (auto *OMPRegionInfo =
3171           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3172     if (OMPRegionInfo->getThreadIDVariable())
3173       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3174 
3175   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3176   QualType Int32Ty =
3177       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3178   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3179   CGF.EmitStoreOfScalar(ThreadID,
3180                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3181 
3182   return ThreadIDTemp;
3183 }
3184 
3185 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3186     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3187   SmallString<256> Buffer;
3188   llvm::raw_svector_ostream Out(Buffer);
3189   Out << Name;
3190   StringRef RuntimeName = Out.str();
3191   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3192   if (Elem.second) {
3193     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3194            "OMP internal variable has different type than requested");
3195     return &*Elem.second;
3196   }
3197 
3198   return Elem.second = new llvm::GlobalVariable(
3199              CGM.getModule(), Ty, /*IsConstant*/ false,
3200              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3201              Elem.first(), /*InsertBefore=*/nullptr,
3202              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3203 }
3204 
3205 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3206   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3207   std::string Name = getName({Prefix, "var"});
3208   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3209 }
3210 
3211 namespace {
3212 /// Common pre(post)-action for different OpenMP constructs.
3213 class CommonActionTy final : public PrePostActionTy {
3214   llvm::FunctionCallee EnterCallee;
3215   ArrayRef<llvm::Value *> EnterArgs;
3216   llvm::FunctionCallee ExitCallee;
3217   ArrayRef<llvm::Value *> ExitArgs;
3218   bool Conditional;
3219   llvm::BasicBlock *ContBlock = nullptr;
3220 
3221 public:
3222   CommonActionTy(llvm::FunctionCallee EnterCallee,
3223                  ArrayRef<llvm::Value *> EnterArgs,
3224                  llvm::FunctionCallee ExitCallee,
3225                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3226       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3227         ExitArgs(ExitArgs), Conditional(Conditional) {}
3228   void Enter(CodeGenFunction &CGF) override {
3229     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3230     if (Conditional) {
3231       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3232       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3233       ContBlock = CGF.createBasicBlock("omp_if.end");
3234       // Generate the branch (If-stmt)
3235       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3236       CGF.EmitBlock(ThenBlock);
3237     }
3238   }
3239   void Done(CodeGenFunction &CGF) {
3240     // Emit the rest of blocks/branches
3241     CGF.EmitBranch(ContBlock);
3242     CGF.EmitBlock(ContBlock, true);
3243   }
3244   void Exit(CodeGenFunction &CGF) override {
3245     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3246   }
3247 };
3248 } // anonymous namespace
3249 
3250 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3251                                          StringRef CriticalName,
3252                                          const RegionCodeGenTy &CriticalOpGen,
3253                                          SourceLocation Loc, const Expr *Hint) {
3254   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3255   // CriticalOpGen();
3256   // __kmpc_end_critical(ident_t *, gtid, Lock);
3257   // Prepare arguments and build a call to __kmpc_critical
3258   if (!CGF.HaveInsertPoint())
3259     return;
3260   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3261                          getCriticalRegionLock(CriticalName)};
3262   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3263                                                 std::end(Args));
3264   if (Hint) {
3265     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3266         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3267   }
3268   CommonActionTy Action(
3269       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3270                                  : OMPRTL__kmpc_critical),
3271       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3272   CriticalOpGen.setAction(Action);
3273   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3274 }
3275 
3276 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3277                                        const RegionCodeGenTy &MasterOpGen,
3278                                        SourceLocation Loc) {
3279   if (!CGF.HaveInsertPoint())
3280     return;
3281   // if(__kmpc_master(ident_t *, gtid)) {
3282   //   MasterOpGen();
3283   //   __kmpc_end_master(ident_t *, gtid);
3284   // }
3285   // Prepare arguments and build a call to __kmpc_master
3286   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3287   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3288                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3289                         /*Conditional=*/true);
3290   MasterOpGen.setAction(Action);
3291   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3292   Action.Done(CGF);
3293 }
3294 
3295 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3296                                         SourceLocation Loc) {
3297   if (!CGF.HaveInsertPoint())
3298     return;
3299   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3300   if (OMPBuilder) {
3301     OMPBuilder->CreateTaskyield(CGF.Builder);
3302   } else {
3303     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3304     llvm::Value *Args[] = {
3305         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3306         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3307     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3308                         Args);
3309   }
3310 
3311   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3312     Region->emitUntiedSwitch(CGF);
3313 }
3314 
3315 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3316                                           const RegionCodeGenTy &TaskgroupOpGen,
3317                                           SourceLocation Loc) {
3318   if (!CGF.HaveInsertPoint())
3319     return;
3320   // __kmpc_taskgroup(ident_t *, gtid);
3321   // TaskgroupOpGen();
3322   // __kmpc_end_taskgroup(ident_t *, gtid);
3323   // Prepare arguments and build a call to __kmpc_taskgroup
3324   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3325   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3326                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3327                         Args);
3328   TaskgroupOpGen.setAction(Action);
3329   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3330 }
3331 
3332 /// Given an array of pointers to variables, project the address of a
3333 /// given variable.
3334 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3335                                       unsigned Index, const VarDecl *Var) {
3336   // Pull out the pointer to the variable.
3337   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3338   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3339 
3340   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3341   Addr = CGF.Builder.CreateElementBitCast(
3342       Addr, CGF.ConvertTypeForMem(Var->getType()));
3343   return Addr;
3344 }
3345 
3346 static llvm::Value *emitCopyprivateCopyFunction(
3347     CodeGenModule &CGM, llvm::Type *ArgsType,
3348     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3349     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3350     SourceLocation Loc) {
3351   ASTContext &C = CGM.getContext();
3352   // void copy_func(void *LHSArg, void *RHSArg);
3353   FunctionArgList Args;
3354   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3355                            ImplicitParamDecl::Other);
3356   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3357                            ImplicitParamDecl::Other);
3358   Args.push_back(&LHSArg);
3359   Args.push_back(&RHSArg);
3360   const auto &CGFI =
3361       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3362   std::string Name =
3363       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3364   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3365                                     llvm::GlobalValue::InternalLinkage, Name,
3366                                     &CGM.getModule());
3367   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3368   Fn->setDoesNotRecurse();
3369   CodeGenFunction CGF(CGM);
3370   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3371   // Dest = (void*[n])(LHSArg);
3372   // Src = (void*[n])(RHSArg);
3373   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3374       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3375       ArgsType), CGF.getPointerAlign());
3376   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3377       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3378       ArgsType), CGF.getPointerAlign());
3379   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3380   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3381   // ...
3382   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3383   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3384     const auto *DestVar =
3385         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3386     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3387 
3388     const auto *SrcVar =
3389         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3390     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3391 
3392     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3393     QualType Type = VD->getType();
3394     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3395   }
3396   CGF.FinishFunction();
3397   return Fn;
3398 }
3399 
3400 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3401                                        const RegionCodeGenTy &SingleOpGen,
3402                                        SourceLocation Loc,
3403                                        ArrayRef<const Expr *> CopyprivateVars,
3404                                        ArrayRef<const Expr *> SrcExprs,
3405                                        ArrayRef<const Expr *> DstExprs,
3406                                        ArrayRef<const Expr *> AssignmentOps) {
3407   if (!CGF.HaveInsertPoint())
3408     return;
3409   assert(CopyprivateVars.size() == SrcExprs.size() &&
3410          CopyprivateVars.size() == DstExprs.size() &&
3411          CopyprivateVars.size() == AssignmentOps.size());
3412   ASTContext &C = CGM.getContext();
3413   // int32 did_it = 0;
3414   // if(__kmpc_single(ident_t *, gtid)) {
3415   //   SingleOpGen();
3416   //   __kmpc_end_single(ident_t *, gtid);
3417   //   did_it = 1;
3418   // }
3419   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3420   // <copy_func>, did_it);
3421 
3422   Address DidIt = Address::invalid();
3423   if (!CopyprivateVars.empty()) {
3424     // int32 did_it = 0;
3425     QualType KmpInt32Ty =
3426         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3427     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3428     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3429   }
3430   // Prepare arguments and build a call to __kmpc_single
3431   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3432   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3433                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3434                         /*Conditional=*/true);
3435   SingleOpGen.setAction(Action);
3436   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3437   if (DidIt.isValid()) {
3438     // did_it = 1;
3439     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3440   }
3441   Action.Done(CGF);
3442   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3443   // <copy_func>, did_it);
3444   if (DidIt.isValid()) {
3445     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3446     QualType CopyprivateArrayTy = C.getConstantArrayType(
3447         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3448         /*IndexTypeQuals=*/0);
3449     // Create a list of all private variables for copyprivate.
3450     Address CopyprivateList =
3451         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3452     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3453       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3454       CGF.Builder.CreateStore(
3455           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3456               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3457               CGF.VoidPtrTy),
3458           Elem);
3459     }
3460     // Build function that copies private values from single region to all other
3461     // threads in the corresponding parallel region.
3462     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3463         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3464         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3465     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3466     Address CL =
3467       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3468                                                       CGF.VoidPtrTy);
3469     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3470     llvm::Value *Args[] = {
3471         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3472         getThreadID(CGF, Loc),        // i32 <gtid>
3473         BufSize,                      // size_t <buf_size>
3474         CL.getPointer(),              // void *<copyprivate list>
3475         CpyFn,                        // void (*) (void *, void *) <copy_func>
3476         DidItVal                      // i32 did_it
3477     };
3478     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3479   }
3480 }
3481 
3482 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3483                                         const RegionCodeGenTy &OrderedOpGen,
3484                                         SourceLocation Loc, bool IsThreads) {
3485   if (!CGF.HaveInsertPoint())
3486     return;
3487   // __kmpc_ordered(ident_t *, gtid);
3488   // OrderedOpGen();
3489   // __kmpc_end_ordered(ident_t *, gtid);
3490   // Prepare arguments and build a call to __kmpc_ordered
3491   if (IsThreads) {
3492     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3493     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3494                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3495                           Args);
3496     OrderedOpGen.setAction(Action);
3497     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3498     return;
3499   }
3500   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3501 }
3502 
3503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3504   unsigned Flags;
3505   if (Kind == OMPD_for)
3506     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3507   else if (Kind == OMPD_sections)
3508     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3509   else if (Kind == OMPD_single)
3510     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3511   else if (Kind == OMPD_barrier)
3512     Flags = OMP_IDENT_BARRIER_EXPL;
3513   else
3514     Flags = OMP_IDENT_BARRIER_IMPL;
3515   return Flags;
3516 }
3517 
3518 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3519     CodeGenFunction &CGF, const OMPLoopDirective &S,
3520     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3521   // Check if the loop directive is actually a doacross loop directive. In this
3522   // case choose static, 1 schedule.
3523   if (llvm::any_of(
3524           S.getClausesOfKind<OMPOrderedClause>(),
3525           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3526     ScheduleKind = OMPC_SCHEDULE_static;
3527     // Chunk size is 1 in this case.
3528     llvm::APInt ChunkSize(32, 1);
3529     ChunkExpr = IntegerLiteral::Create(
3530         CGF.getContext(), ChunkSize,
3531         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3532         SourceLocation());
3533   }
3534 }
3535 
3536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3537                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3538                                       bool ForceSimpleCall) {
3539   // Check if we should use the OMPBuilder
3540   auto *OMPRegionInfo =
3541       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3542   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3543   if (OMPBuilder) {
3544     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3545         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3546     return;
3547   }
3548 
3549   if (!CGF.HaveInsertPoint())
3550     return;
3551   // Build call __kmpc_cancel_barrier(loc, thread_id);
3552   // Build call __kmpc_barrier(loc, thread_id);
3553   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3554   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3555   // thread_id);
3556   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3557                          getThreadID(CGF, Loc)};
3558   if (OMPRegionInfo) {
3559     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3560       llvm::Value *Result = CGF.EmitRuntimeCall(
3561           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3562       if (EmitChecks) {
3563         // if (__kmpc_cancel_barrier()) {
3564         //   exit from construct;
3565         // }
3566         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3567         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3568         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3569         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3570         CGF.EmitBlock(ExitBB);
3571         //   exit from construct;
3572         CodeGenFunction::JumpDest CancelDestination =
3573             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3574         CGF.EmitBranchThroughCleanup(CancelDestination);
3575         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3576       }
3577       return;
3578     }
3579   }
3580   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3581 }
3582 
3583 /// Map the OpenMP loop schedule to the runtime enumeration.
3584 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3585                                           bool Chunked, bool Ordered) {
3586   switch (ScheduleKind) {
3587   case OMPC_SCHEDULE_static:
3588     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3589                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3590   case OMPC_SCHEDULE_dynamic:
3591     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3592   case OMPC_SCHEDULE_guided:
3593     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3594   case OMPC_SCHEDULE_runtime:
3595     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3596   case OMPC_SCHEDULE_auto:
3597     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3598   case OMPC_SCHEDULE_unknown:
3599     assert(!Chunked && "chunk was specified but schedule kind not known");
3600     return Ordered ? OMP_ord_static : OMP_sch_static;
3601   }
3602   llvm_unreachable("Unexpected runtime schedule");
3603 }
3604 
3605 /// Map the OpenMP distribute schedule to the runtime enumeration.
3606 static OpenMPSchedType
3607 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3608   // only static is allowed for dist_schedule
3609   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3610 }
3611 
3612 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3613                                          bool Chunked) const {
3614   OpenMPSchedType Schedule =
3615       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3616   return Schedule == OMP_sch_static;
3617 }
3618 
3619 bool CGOpenMPRuntime::isStaticNonchunked(
3620     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3621   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3622   return Schedule == OMP_dist_sch_static;
3623 }
3624 
3625 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3626                                       bool Chunked) const {
3627   OpenMPSchedType Schedule =
3628       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3629   return Schedule == OMP_sch_static_chunked;
3630 }
3631 
3632 bool CGOpenMPRuntime::isStaticChunked(
3633     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3634   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3635   return Schedule == OMP_dist_sch_static_chunked;
3636 }
3637 
3638 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3639   OpenMPSchedType Schedule =
3640       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3641   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3642   return Schedule != OMP_sch_static;
3643 }
3644 
3645 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3646                                   OpenMPScheduleClauseModifier M1,
3647                                   OpenMPScheduleClauseModifier M2) {
3648   int Modifier = 0;
3649   switch (M1) {
3650   case OMPC_SCHEDULE_MODIFIER_monotonic:
3651     Modifier = OMP_sch_modifier_monotonic;
3652     break;
3653   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3654     Modifier = OMP_sch_modifier_nonmonotonic;
3655     break;
3656   case OMPC_SCHEDULE_MODIFIER_simd:
3657     if (Schedule == OMP_sch_static_chunked)
3658       Schedule = OMP_sch_static_balanced_chunked;
3659     break;
3660   case OMPC_SCHEDULE_MODIFIER_last:
3661   case OMPC_SCHEDULE_MODIFIER_unknown:
3662     break;
3663   }
3664   switch (M2) {
3665   case OMPC_SCHEDULE_MODIFIER_monotonic:
3666     Modifier = OMP_sch_modifier_monotonic;
3667     break;
3668   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3669     Modifier = OMP_sch_modifier_nonmonotonic;
3670     break;
3671   case OMPC_SCHEDULE_MODIFIER_simd:
3672     if (Schedule == OMP_sch_static_chunked)
3673       Schedule = OMP_sch_static_balanced_chunked;
3674     break;
3675   case OMPC_SCHEDULE_MODIFIER_last:
3676   case OMPC_SCHEDULE_MODIFIER_unknown:
3677     break;
3678   }
3679   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3680   // If the static schedule kind is specified or if the ordered clause is
3681   // specified, and if the nonmonotonic modifier is not specified, the effect is
3682   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3683   // modifier is specified, the effect is as if the nonmonotonic modifier is
3684   // specified.
3685   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3686     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3687           Schedule == OMP_sch_static_balanced_chunked ||
3688           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3689           Schedule == OMP_dist_sch_static_chunked ||
3690           Schedule == OMP_dist_sch_static))
3691       Modifier = OMP_sch_modifier_nonmonotonic;
3692   }
3693   return Schedule | Modifier;
3694 }
3695 
3696 void CGOpenMPRuntime::emitForDispatchInit(
3697     CodeGenFunction &CGF, SourceLocation Loc,
3698     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3699     bool Ordered, const DispatchRTInput &DispatchValues) {
3700   if (!CGF.HaveInsertPoint())
3701     return;
3702   OpenMPSchedType Schedule = getRuntimeSchedule(
3703       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3704   assert(Ordered ||
3705          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3706           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3707           Schedule != OMP_sch_static_balanced_chunked));
3708   // Call __kmpc_dispatch_init(
3709   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3710   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3711   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3712 
3713   // If the Chunk was not specified in the clause - use default value 1.
3714   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3715                                             : CGF.Builder.getIntN(IVSize, 1);
3716   llvm::Value *Args[] = {
3717       emitUpdateLocation(CGF, Loc),
3718       getThreadID(CGF, Loc),
3719       CGF.Builder.getInt32(addMonoNonMonoModifier(
3720           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3721       DispatchValues.LB,                                     // Lower
3722       DispatchValues.UB,                                     // Upper
3723       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3724       Chunk                                                  // Chunk
3725   };
3726   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3727 }
3728 
3729 static void emitForStaticInitCall(
3730     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3731     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3732     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3733     const CGOpenMPRuntime::StaticRTInput &Values) {
3734   if (!CGF.HaveInsertPoint())
3735     return;
3736 
3737   assert(!Values.Ordered);
3738   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3739          Schedule == OMP_sch_static_balanced_chunked ||
3740          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3741          Schedule == OMP_dist_sch_static ||
3742          Schedule == OMP_dist_sch_static_chunked);
3743 
3744   // Call __kmpc_for_static_init(
3745   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3746   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3747   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3748   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3749   llvm::Value *Chunk = Values.Chunk;
3750   if (Chunk == nullptr) {
3751     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3752             Schedule == OMP_dist_sch_static) &&
3753            "expected static non-chunked schedule");
3754     // If the Chunk was not specified in the clause - use default value 1.
3755     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3756   } else {
3757     assert((Schedule == OMP_sch_static_chunked ||
3758             Schedule == OMP_sch_static_balanced_chunked ||
3759             Schedule == OMP_ord_static_chunked ||
3760             Schedule == OMP_dist_sch_static_chunked) &&
3761            "expected static chunked schedule");
3762   }
3763   llvm::Value *Args[] = {
3764       UpdateLocation,
3765       ThreadId,
3766       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3767                                                   M2)), // Schedule type
3768       Values.IL.getPointer(),                           // &isLastIter
3769       Values.LB.getPointer(),                           // &LB
3770       Values.UB.getPointer(),                           // &UB
3771       Values.ST.getPointer(),                           // &Stride
3772       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3773       Chunk                                             // Chunk
3774   };
3775   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3776 }
3777 
3778 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3779                                         SourceLocation Loc,
3780                                         OpenMPDirectiveKind DKind,
3781                                         const OpenMPScheduleTy &ScheduleKind,
3782                                         const StaticRTInput &Values) {
3783   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3784       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3785   assert(isOpenMPWorksharingDirective(DKind) &&
3786          "Expected loop-based or sections-based directive.");
3787   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3788                                              isOpenMPLoopDirective(DKind)
3789                                                  ? OMP_IDENT_WORK_LOOP
3790                                                  : OMP_IDENT_WORK_SECTIONS);
3791   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3792   llvm::FunctionCallee StaticInitFunction =
3793       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3794   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3795   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3796                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3797 }
3798 
3799 void CGOpenMPRuntime::emitDistributeStaticInit(
3800     CodeGenFunction &CGF, SourceLocation Loc,
3801     OpenMPDistScheduleClauseKind SchedKind,
3802     const CGOpenMPRuntime::StaticRTInput &Values) {
3803   OpenMPSchedType ScheduleNum =
3804       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3805   llvm::Value *UpdatedLocation =
3806       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3807   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3808   llvm::FunctionCallee StaticInitFunction =
3809       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3810   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3811                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3812                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3813 }
3814 
3815 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3816                                           SourceLocation Loc,
3817                                           OpenMPDirectiveKind DKind) {
3818   if (!CGF.HaveInsertPoint())
3819     return;
3820   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3821   llvm::Value *Args[] = {
3822       emitUpdateLocation(CGF, Loc,
3823                          isOpenMPDistributeDirective(DKind)
3824                              ? OMP_IDENT_WORK_DISTRIBUTE
3825                              : isOpenMPLoopDirective(DKind)
3826                                    ? OMP_IDENT_WORK_LOOP
3827                                    : OMP_IDENT_WORK_SECTIONS),
3828       getThreadID(CGF, Loc)};
3829   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3830   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3831                       Args);
3832 }
3833 
3834 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3835                                                  SourceLocation Loc,
3836                                                  unsigned IVSize,
3837                                                  bool IVSigned) {
3838   if (!CGF.HaveInsertPoint())
3839     return;
3840   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3841   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3842   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3843 }
3844 
3845 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3846                                           SourceLocation Loc, unsigned IVSize,
3847                                           bool IVSigned, Address IL,
3848                                           Address LB, Address UB,
3849                                           Address ST) {
3850   // Call __kmpc_dispatch_next(
3851   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3852   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3853   //          kmp_int[32|64] *p_stride);
3854   llvm::Value *Args[] = {
3855       emitUpdateLocation(CGF, Loc),
3856       getThreadID(CGF, Loc),
3857       IL.getPointer(), // &isLastIter
3858       LB.getPointer(), // &Lower
3859       UB.getPointer(), // &Upper
3860       ST.getPointer()  // &Stride
3861   };
3862   llvm::Value *Call =
3863       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3864   return CGF.EmitScalarConversion(
3865       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3866       CGF.getContext().BoolTy, Loc);
3867 }
3868 
3869 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3870                                            llvm::Value *NumThreads,
3871                                            SourceLocation Loc) {
3872   if (!CGF.HaveInsertPoint())
3873     return;
3874   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3875   llvm::Value *Args[] = {
3876       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3877       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3878   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3879                       Args);
3880 }
3881 
3882 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3883                                          ProcBindKind ProcBind,
3884                                          SourceLocation Loc) {
3885   if (!CGF.HaveInsertPoint())
3886     return;
3887   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3888   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3889   llvm::Value *Args[] = {
3890       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3891       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3892   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3893 }
3894 
3895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3896                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3897   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3898   if (OMPBuilder) {
3899     OMPBuilder->CreateFlush(CGF.Builder);
3900   } else {
3901     if (!CGF.HaveInsertPoint())
3902       return;
3903     // Build call void __kmpc_flush(ident_t *loc)
3904     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3905                         emitUpdateLocation(CGF, Loc));
3906   }
3907 }
3908 
3909 namespace {
3910 /// Indexes of fields for type kmp_task_t.
3911 enum KmpTaskTFields {
3912   /// List of shared variables.
3913   KmpTaskTShareds,
3914   /// Task routine.
3915   KmpTaskTRoutine,
3916   /// Partition id for the untied tasks.
3917   KmpTaskTPartId,
3918   /// Function with call of destructors for private variables.
3919   Data1,
3920   /// Task priority.
3921   Data2,
3922   /// (Taskloops only) Lower bound.
3923   KmpTaskTLowerBound,
3924   /// (Taskloops only) Upper bound.
3925   KmpTaskTUpperBound,
3926   /// (Taskloops only) Stride.
3927   KmpTaskTStride,
3928   /// (Taskloops only) Is last iteration flag.
3929   KmpTaskTLastIter,
3930   /// (Taskloops only) Reduction data.
3931   KmpTaskTReductions,
3932 };
3933 } // anonymous namespace
3934 
3935 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3936   return OffloadEntriesTargetRegion.empty() &&
3937          OffloadEntriesDeviceGlobalVar.empty();
3938 }
3939 
3940 /// Initialize target region entry.
3941 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3942     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3943                                     StringRef ParentName, unsigned LineNum,
3944                                     unsigned Order) {
3945   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3946                                              "only required for the device "
3947                                              "code generation.");
3948   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3949       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3950                                    OMPTargetRegionEntryTargetRegion);
3951   ++OffloadingEntriesNum;
3952 }
3953 
3954 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3955     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3956                                   StringRef ParentName, unsigned LineNum,
3957                                   llvm::Constant *Addr, llvm::Constant *ID,
3958                                   OMPTargetRegionEntryKind Flags) {
3959   // If we are emitting code for a target, the entry is already initialized,
3960   // only has to be registered.
3961   if (CGM.getLangOpts().OpenMPIsDevice) {
3962     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3963       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3964           DiagnosticsEngine::Error,
3965           "Unable to find target region on line '%0' in the device code.");
3966       CGM.getDiags().Report(DiagID) << LineNum;
3967       return;
3968     }
3969     auto &Entry =
3970         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3971     assert(Entry.isValid() && "Entry not initialized!");
3972     Entry.setAddress(Addr);
3973     Entry.setID(ID);
3974     Entry.setFlags(Flags);
3975   } else {
3976     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3977     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3978     ++OffloadingEntriesNum;
3979   }
3980 }
3981 
3982 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3983     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3984     unsigned LineNum) const {
3985   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3986   if (PerDevice == OffloadEntriesTargetRegion.end())
3987     return false;
3988   auto PerFile = PerDevice->second.find(FileID);
3989   if (PerFile == PerDevice->second.end())
3990     return false;
3991   auto PerParentName = PerFile->second.find(ParentName);
3992   if (PerParentName == PerFile->second.end())
3993     return false;
3994   auto PerLine = PerParentName->second.find(LineNum);
3995   if (PerLine == PerParentName->second.end())
3996     return false;
3997   // Fail if this entry is already registered.
3998   if (PerLine->second.getAddress() || PerLine->second.getID())
3999     return false;
4000   return true;
4001 }
4002 
4003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4004     const OffloadTargetRegionEntryInfoActTy &Action) {
4005   // Scan all target region entries and perform the provided action.
4006   for (const auto &D : OffloadEntriesTargetRegion)
4007     for (const auto &F : D.second)
4008       for (const auto &P : F.second)
4009         for (const auto &L : P.second)
4010           Action(D.first, F.first, P.first(), L.first, L.second);
4011 }
4012 
4013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4014     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4015                                        OMPTargetGlobalVarEntryKind Flags,
4016                                        unsigned Order) {
4017   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4018                                              "only required for the device "
4019                                              "code generation.");
4020   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4021   ++OffloadingEntriesNum;
4022 }
4023 
4024 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4025     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4026                                      CharUnits VarSize,
4027                                      OMPTargetGlobalVarEntryKind Flags,
4028                                      llvm::GlobalValue::LinkageTypes Linkage) {
4029   if (CGM.getLangOpts().OpenMPIsDevice) {
4030     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4031     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4032            "Entry not initialized!");
4033     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4034            "Resetting with the new address.");
4035     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4036       if (Entry.getVarSize().isZero()) {
4037         Entry.setVarSize(VarSize);
4038         Entry.setLinkage(Linkage);
4039       }
4040       return;
4041     }
4042     Entry.setVarSize(VarSize);
4043     Entry.setLinkage(Linkage);
4044     Entry.setAddress(Addr);
4045   } else {
4046     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4047       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4048       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4049              "Entry not initialized!");
4050       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4051              "Resetting with the new address.");
4052       if (Entry.getVarSize().isZero()) {
4053         Entry.setVarSize(VarSize);
4054         Entry.setLinkage(Linkage);
4055       }
4056       return;
4057     }
4058     OffloadEntriesDeviceGlobalVar.try_emplace(
4059         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4060     ++OffloadingEntriesNum;
4061   }
4062 }
4063 
4064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4065     actOnDeviceGlobalVarEntriesInfo(
4066         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4067   // Scan all target region entries and perform the provided action.
4068   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4069     Action(E.getKey(), E.getValue());
4070 }
4071 
4072 void CGOpenMPRuntime::createOffloadEntry(
4073     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4074     llvm::GlobalValue::LinkageTypes Linkage) {
4075   StringRef Name = Addr->getName();
4076   llvm::Module &M = CGM.getModule();
4077   llvm::LLVMContext &C = M.getContext();
4078 
4079   // Create constant string with the name.
4080   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4081 
4082   std::string StringName = getName({"omp_offloading", "entry_name"});
4083   auto *Str = new llvm::GlobalVariable(
4084       M, StrPtrInit->getType(), /*isConstant=*/true,
4085       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4086   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4087 
4088   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4089                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4090                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4091                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4092                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4093   std::string EntryName = getName({"omp_offloading", "entry", ""});
4094   llvm::GlobalVariable *Entry = createGlobalStruct(
4095       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4096       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4097 
4098   // The entry has to be created in the section the linker expects it to be.
4099   Entry->setSection("omp_offloading_entries");
4100 }
4101 
4102 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4103   // Emit the offloading entries and metadata so that the device codegen side
4104   // can easily figure out what to emit. The produced metadata looks like
4105   // this:
4106   //
4107   // !omp_offload.info = !{!1, ...}
4108   //
4109   // Right now we only generate metadata for function that contain target
4110   // regions.
4111 
4112   // If we are in simd mode or there are no entries, we don't need to do
4113   // anything.
4114   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4115     return;
4116 
4117   llvm::Module &M = CGM.getModule();
4118   llvm::LLVMContext &C = M.getContext();
4119   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4120                          SourceLocation, StringRef>,
4121               16>
4122       OrderedEntries(OffloadEntriesInfoManager.size());
4123   llvm::SmallVector<StringRef, 16> ParentFunctions(
4124       OffloadEntriesInfoManager.size());
4125 
4126   // Auxiliary methods to create metadata values and strings.
4127   auto &&GetMDInt = [this](unsigned V) {
4128     return llvm::ConstantAsMetadata::get(
4129         llvm::ConstantInt::get(CGM.Int32Ty, V));
4130   };
4131 
4132   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4133 
4134   // Create the offloading info metadata node.
4135   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4136 
4137   // Create function that emits metadata for each target region entry;
4138   auto &&TargetRegionMetadataEmitter =
4139       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4140        &GetMDString](
4141           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4142           unsigned Line,
4143           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4144         // Generate metadata for target regions. Each entry of this metadata
4145         // contains:
4146         // - Entry 0 -> Kind of this type of metadata (0).
4147         // - Entry 1 -> Device ID of the file where the entry was identified.
4148         // - Entry 2 -> File ID of the file where the entry was identified.
4149         // - Entry 3 -> Mangled name of the function where the entry was
4150         // identified.
4151         // - Entry 4 -> Line in the file where the entry was identified.
4152         // - Entry 5 -> Order the entry was created.
4153         // The first element of the metadata node is the kind.
4154         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4155                                  GetMDInt(FileID),      GetMDString(ParentName),
4156                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4157 
4158         SourceLocation Loc;
4159         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4160                   E = CGM.getContext().getSourceManager().fileinfo_end();
4161              I != E; ++I) {
4162           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4163               I->getFirst()->getUniqueID().getFile() == FileID) {
4164             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4165                 I->getFirst(), Line, 1);
4166             break;
4167           }
4168         }
4169         // Save this entry in the right position of the ordered entries array.
4170         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4171         ParentFunctions[E.getOrder()] = ParentName;
4172 
4173         // Add metadata to the named metadata node.
4174         MD->addOperand(llvm::MDNode::get(C, Ops));
4175       };
4176 
4177   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4178       TargetRegionMetadataEmitter);
4179 
4180   // Create function that emits metadata for each device global variable entry;
4181   auto &&DeviceGlobalVarMetadataEmitter =
4182       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4183        MD](StringRef MangledName,
4184            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4185                &E) {
4186         // Generate metadata for global variables. Each entry of this metadata
4187         // contains:
4188         // - Entry 0 -> Kind of this type of metadata (1).
4189         // - Entry 1 -> Mangled name of the variable.
4190         // - Entry 2 -> Declare target kind.
4191         // - Entry 3 -> Order the entry was created.
4192         // The first element of the metadata node is the kind.
4193         llvm::Metadata *Ops[] = {
4194             GetMDInt(E.getKind()), GetMDString(MangledName),
4195             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4196 
4197         // Save this entry in the right position of the ordered entries array.
4198         OrderedEntries[E.getOrder()] =
4199             std::make_tuple(&E, SourceLocation(), MangledName);
4200 
4201         // Add metadata to the named metadata node.
4202         MD->addOperand(llvm::MDNode::get(C, Ops));
4203       };
4204 
4205   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4206       DeviceGlobalVarMetadataEmitter);
4207 
4208   for (const auto &E : OrderedEntries) {
4209     assert(std::get<0>(E) && "All ordered entries must exist!");
4210     if (const auto *CE =
4211             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4212                 std::get<0>(E))) {
4213       if (!CE->getID() || !CE->getAddress()) {
4214         // Do not blame the entry if the parent funtion is not emitted.
4215         StringRef FnName = ParentFunctions[CE->getOrder()];
4216         if (!CGM.GetGlobalValue(FnName))
4217           continue;
4218         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4219             DiagnosticsEngine::Error,
4220             "Offloading entry for target region in %0 is incorrect: either the "
4221             "address or the ID is invalid.");
4222         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4223         continue;
4224       }
4225       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4226                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4227     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4228                                              OffloadEntryInfoDeviceGlobalVar>(
4229                    std::get<0>(E))) {
4230       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4231           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4232               CE->getFlags());
4233       switch (Flags) {
4234       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4235         if (CGM.getLangOpts().OpenMPIsDevice &&
4236             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4237           continue;
4238         if (!CE->getAddress()) {
4239           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4240               DiagnosticsEngine::Error, "Offloading entry for declare target "
4241                                         "variable %0 is incorrect: the "
4242                                         "address is invalid.");
4243           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4244           continue;
4245         }
4246         // The vaiable has no definition - no need to add the entry.
4247         if (CE->getVarSize().isZero())
4248           continue;
4249         break;
4250       }
4251       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4252         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4253                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4254                "Declaret target link address is set.");
4255         if (CGM.getLangOpts().OpenMPIsDevice)
4256           continue;
4257         if (!CE->getAddress()) {
4258           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4259               DiagnosticsEngine::Error,
4260               "Offloading entry for declare target variable is incorrect: the "
4261               "address is invalid.");
4262           CGM.getDiags().Report(DiagID);
4263           continue;
4264         }
4265         break;
4266       }
4267       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4268                          CE->getVarSize().getQuantity(), Flags,
4269                          CE->getLinkage());
4270     } else {
4271       llvm_unreachable("Unsupported entry kind.");
4272     }
4273   }
4274 }
4275 
4276 /// Loads all the offload entries information from the host IR
4277 /// metadata.
4278 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4279   // If we are in target mode, load the metadata from the host IR. This code has
4280   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4281 
4282   if (!CGM.getLangOpts().OpenMPIsDevice)
4283     return;
4284 
4285   if (CGM.getLangOpts().OMPHostIRFile.empty())
4286     return;
4287 
4288   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4289   if (auto EC = Buf.getError()) {
4290     CGM.getDiags().Report(diag::err_cannot_open_file)
4291         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4292     return;
4293   }
4294 
4295   llvm::LLVMContext C;
4296   auto ME = expectedToErrorOrAndEmitErrors(
4297       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4298 
4299   if (auto EC = ME.getError()) {
4300     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4301         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4302     CGM.getDiags().Report(DiagID)
4303         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4304     return;
4305   }
4306 
4307   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4308   if (!MD)
4309     return;
4310 
4311   for (llvm::MDNode *MN : MD->operands()) {
4312     auto &&GetMDInt = [MN](unsigned Idx) {
4313       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4314       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4315     };
4316 
4317     auto &&GetMDString = [MN](unsigned Idx) {
4318       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4319       return V->getString();
4320     };
4321 
4322     switch (GetMDInt(0)) {
4323     default:
4324       llvm_unreachable("Unexpected metadata!");
4325       break;
4326     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4327         OffloadingEntryInfoTargetRegion:
4328       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4329           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4330           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4331           /*Order=*/GetMDInt(5));
4332       break;
4333     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4334         OffloadingEntryInfoDeviceGlobalVar:
4335       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4336           /*MangledName=*/GetMDString(1),
4337           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4338               /*Flags=*/GetMDInt(2)),
4339           /*Order=*/GetMDInt(3));
4340       break;
4341     }
4342   }
4343 }
4344 
4345 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4346   if (!KmpRoutineEntryPtrTy) {
4347     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4348     ASTContext &C = CGM.getContext();
4349     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4350     FunctionProtoType::ExtProtoInfo EPI;
4351     KmpRoutineEntryPtrQTy = C.getPointerType(
4352         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4353     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4354   }
4355 }
4356 
4357 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4358   // Make sure the type of the entry is already created. This is the type we
4359   // have to create:
4360   // struct __tgt_offload_entry{
4361   //   void      *addr;       // Pointer to the offload entry info.
4362   //                          // (function or global)
4363   //   char      *name;       // Name of the function or global.
4364   //   size_t     size;       // Size of the entry info (0 if it a function).
4365   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4366   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4367   // };
4368   if (TgtOffloadEntryQTy.isNull()) {
4369     ASTContext &C = CGM.getContext();
4370     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4371     RD->startDefinition();
4372     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4373     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4374     addFieldToRecordDecl(C, RD, C.getSizeType());
4375     addFieldToRecordDecl(
4376         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4377     addFieldToRecordDecl(
4378         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4379     RD->completeDefinition();
4380     RD->addAttr(PackedAttr::CreateImplicit(C));
4381     TgtOffloadEntryQTy = C.getRecordType(RD);
4382   }
4383   return TgtOffloadEntryQTy;
4384 }
4385 
4386 namespace {
4387 struct PrivateHelpersTy {
4388   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4389                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4390       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4391         PrivateElemInit(PrivateElemInit) {}
4392   const Expr *OriginalRef = nullptr;
4393   const VarDecl *Original = nullptr;
4394   const VarDecl *PrivateCopy = nullptr;
4395   const VarDecl *PrivateElemInit = nullptr;
4396 };
4397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4398 } // anonymous namespace
4399 
4400 static RecordDecl *
4401 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4402   if (!Privates.empty()) {
4403     ASTContext &C = CGM.getContext();
4404     // Build struct .kmp_privates_t. {
4405     //         /*  private vars  */
4406     //       };
4407     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4408     RD->startDefinition();
4409     for (const auto &Pair : Privates) {
4410       const VarDecl *VD = Pair.second.Original;
4411       QualType Type = VD->getType().getNonReferenceType();
4412       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4413       if (VD->hasAttrs()) {
4414         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4415              E(VD->getAttrs().end());
4416              I != E; ++I)
4417           FD->addAttr(*I);
4418       }
4419     }
4420     RD->completeDefinition();
4421     return RD;
4422   }
4423   return nullptr;
4424 }
4425 
4426 static RecordDecl *
4427 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4428                          QualType KmpInt32Ty,
4429                          QualType KmpRoutineEntryPointerQTy) {
4430   ASTContext &C = CGM.getContext();
4431   // Build struct kmp_task_t {
4432   //         void *              shareds;
4433   //         kmp_routine_entry_t routine;
4434   //         kmp_int32           part_id;
4435   //         kmp_cmplrdata_t data1;
4436   //         kmp_cmplrdata_t data2;
4437   // For taskloops additional fields:
4438   //         kmp_uint64          lb;
4439   //         kmp_uint64          ub;
4440   //         kmp_int64           st;
4441   //         kmp_int32           liter;
4442   //         void *              reductions;
4443   //       };
4444   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4445   UD->startDefinition();
4446   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4447   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4448   UD->completeDefinition();
4449   QualType KmpCmplrdataTy = C.getRecordType(UD);
4450   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4451   RD->startDefinition();
4452   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4453   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4454   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4455   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4456   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4457   if (isOpenMPTaskLoopDirective(Kind)) {
4458     QualType KmpUInt64Ty =
4459         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4460     QualType KmpInt64Ty =
4461         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4462     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4463     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4464     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4465     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4466     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4467   }
4468   RD->completeDefinition();
4469   return RD;
4470 }
4471 
4472 static RecordDecl *
4473 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4474                                      ArrayRef<PrivateDataTy> Privates) {
4475   ASTContext &C = CGM.getContext();
4476   // Build struct kmp_task_t_with_privates {
4477   //         kmp_task_t task_data;
4478   //         .kmp_privates_t. privates;
4479   //       };
4480   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4481   RD->startDefinition();
4482   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4483   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4484     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4485   RD->completeDefinition();
4486   return RD;
4487 }
4488 
4489 /// Emit a proxy function which accepts kmp_task_t as the second
4490 /// argument.
4491 /// \code
4492 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4493 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4494 ///   For taskloops:
4495 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4496 ///   tt->reductions, tt->shareds);
4497 ///   return 0;
4498 /// }
4499 /// \endcode
4500 static llvm::Function *
4501 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4502                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4503                       QualType KmpTaskTWithPrivatesPtrQTy,
4504                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4505                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4506                       llvm::Value *TaskPrivatesMap) {
4507   ASTContext &C = CGM.getContext();
4508   FunctionArgList Args;
4509   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4510                             ImplicitParamDecl::Other);
4511   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4512                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4513                                 ImplicitParamDecl::Other);
4514   Args.push_back(&GtidArg);
4515   Args.push_back(&TaskTypeArg);
4516   const auto &TaskEntryFnInfo =
4517       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4518   llvm::FunctionType *TaskEntryTy =
4519       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4520   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4521   auto *TaskEntry = llvm::Function::Create(
4522       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4523   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4524   TaskEntry->setDoesNotRecurse();
4525   CodeGenFunction CGF(CGM);
4526   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4527                     Loc, Loc);
4528 
4529   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4530   // tt,
4531   // For taskloops:
4532   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4533   // tt->task_data.shareds);
4534   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4535       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4536   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4537       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4538       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4539   const auto *KmpTaskTWithPrivatesQTyRD =
4540       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4541   LValue Base =
4542       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4543   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4544   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4545   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4546   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4547 
4548   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4549   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4550   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4551       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4552       CGF.ConvertTypeForMem(SharedsPtrTy));
4553 
4554   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4555   llvm::Value *PrivatesParam;
4556   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4557     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4558     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4559         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4560   } else {
4561     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4562   }
4563 
4564   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4565                                TaskPrivatesMap,
4566                                CGF.Builder
4567                                    .CreatePointerBitCastOrAddrSpaceCast(
4568                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4569                                    .getPointer()};
4570   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4571                                           std::end(CommonArgs));
4572   if (isOpenMPTaskLoopDirective(Kind)) {
4573     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4574     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4575     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4576     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4577     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4578     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4579     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4580     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4581     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4582     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4583     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4584     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4585     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4586     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4587     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4588     CallArgs.push_back(LBParam);
4589     CallArgs.push_back(UBParam);
4590     CallArgs.push_back(StParam);
4591     CallArgs.push_back(LIParam);
4592     CallArgs.push_back(RParam);
4593   }
4594   CallArgs.push_back(SharedsParam);
4595 
4596   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4597                                                   CallArgs);
4598   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4599                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4600   CGF.FinishFunction();
4601   return TaskEntry;
4602 }
4603 
4604 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4605                                             SourceLocation Loc,
4606                                             QualType KmpInt32Ty,
4607                                             QualType KmpTaskTWithPrivatesPtrQTy,
4608                                             QualType KmpTaskTWithPrivatesQTy) {
4609   ASTContext &C = CGM.getContext();
4610   FunctionArgList Args;
4611   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4612                             ImplicitParamDecl::Other);
4613   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4614                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4615                                 ImplicitParamDecl::Other);
4616   Args.push_back(&GtidArg);
4617   Args.push_back(&TaskTypeArg);
4618   const auto &DestructorFnInfo =
4619       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4620   llvm::FunctionType *DestructorFnTy =
4621       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4622   std::string Name =
4623       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4624   auto *DestructorFn =
4625       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4626                              Name, &CGM.getModule());
4627   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4628                                     DestructorFnInfo);
4629   DestructorFn->setDoesNotRecurse();
4630   CodeGenFunction CGF(CGM);
4631   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4632                     Args, Loc, Loc);
4633 
4634   LValue Base = CGF.EmitLoadOfPointerLValue(
4635       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4636       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4637   const auto *KmpTaskTWithPrivatesQTyRD =
4638       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4639   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4640   Base = CGF.EmitLValueForField(Base, *FI);
4641   for (const auto *Field :
4642        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4643     if (QualType::DestructionKind DtorKind =
4644             Field->getType().isDestructedType()) {
4645       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4646       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4647     }
4648   }
4649   CGF.FinishFunction();
4650   return DestructorFn;
4651 }
4652 
4653 /// Emit a privates mapping function for correct handling of private and
4654 /// firstprivate variables.
4655 /// \code
4656 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4657 /// **noalias priv1,...,  <tyn> **noalias privn) {
4658 ///   *priv1 = &.privates.priv1;
4659 ///   ...;
4660 ///   *privn = &.privates.privn;
4661 /// }
4662 /// \endcode
4663 static llvm::Value *
4664 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4665                                ArrayRef<const Expr *> PrivateVars,
4666                                ArrayRef<const Expr *> FirstprivateVars,
4667                                ArrayRef<const Expr *> LastprivateVars,
4668                                QualType PrivatesQTy,
4669                                ArrayRef<PrivateDataTy> Privates) {
4670   ASTContext &C = CGM.getContext();
4671   FunctionArgList Args;
4672   ImplicitParamDecl TaskPrivatesArg(
4673       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4674       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4675       ImplicitParamDecl::Other);
4676   Args.push_back(&TaskPrivatesArg);
4677   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4678   unsigned Counter = 1;
4679   for (const Expr *E : PrivateVars) {
4680     Args.push_back(ImplicitParamDecl::Create(
4681         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4682         C.getPointerType(C.getPointerType(E->getType()))
4683             .withConst()
4684             .withRestrict(),
4685         ImplicitParamDecl::Other));
4686     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4687     PrivateVarsPos[VD] = Counter;
4688     ++Counter;
4689   }
4690   for (const Expr *E : FirstprivateVars) {
4691     Args.push_back(ImplicitParamDecl::Create(
4692         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4693         C.getPointerType(C.getPointerType(E->getType()))
4694             .withConst()
4695             .withRestrict(),
4696         ImplicitParamDecl::Other));
4697     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4698     PrivateVarsPos[VD] = Counter;
4699     ++Counter;
4700   }
4701   for (const Expr *E : LastprivateVars) {
4702     Args.push_back(ImplicitParamDecl::Create(
4703         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4704         C.getPointerType(C.getPointerType(E->getType()))
4705             .withConst()
4706             .withRestrict(),
4707         ImplicitParamDecl::Other));
4708     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4709     PrivateVarsPos[VD] = Counter;
4710     ++Counter;
4711   }
4712   const auto &TaskPrivatesMapFnInfo =
4713       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4714   llvm::FunctionType *TaskPrivatesMapTy =
4715       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4716   std::string Name =
4717       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4718   auto *TaskPrivatesMap = llvm::Function::Create(
4719       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4720       &CGM.getModule());
4721   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4722                                     TaskPrivatesMapFnInfo);
4723   if (CGM.getLangOpts().Optimize) {
4724     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4725     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4726     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4727   }
4728   CodeGenFunction CGF(CGM);
4729   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4730                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4731 
4732   // *privi = &.privates.privi;
4733   LValue Base = CGF.EmitLoadOfPointerLValue(
4734       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4735       TaskPrivatesArg.getType()->castAs<PointerType>());
4736   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4737   Counter = 0;
4738   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4739     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4740     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4741     LValue RefLVal =
4742         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4743     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4744         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4745     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4746     ++Counter;
4747   }
4748   CGF.FinishFunction();
4749   return TaskPrivatesMap;
4750 }
4751 
4752 /// Emit initialization for private variables in task-based directives.
4753 static void emitPrivatesInit(CodeGenFunction &CGF,
4754                              const OMPExecutableDirective &D,
4755                              Address KmpTaskSharedsPtr, LValue TDBase,
4756                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4757                              QualType SharedsTy, QualType SharedsPtrTy,
4758                              const OMPTaskDataTy &Data,
4759                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4760   ASTContext &C = CGF.getContext();
4761   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4762   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4763   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4764                                  ? OMPD_taskloop
4765                                  : OMPD_task;
4766   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4767   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4768   LValue SrcBase;
4769   bool IsTargetTask =
4770       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4771       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4772   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4773   // PointersArray and SizesArray. The original variables for these arrays are
4774   // not captured and we get their addresses explicitly.
4775   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4776       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4777     SrcBase = CGF.MakeAddrLValue(
4778         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4779             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4780         SharedsTy);
4781   }
4782   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4783   for (const PrivateDataTy &Pair : Privates) {
4784     const VarDecl *VD = Pair.second.PrivateCopy;
4785     const Expr *Init = VD->getAnyInitializer();
4786     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4787                              !CGF.isTrivialInitializer(Init)))) {
4788       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4789       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4790         const VarDecl *OriginalVD = Pair.second.Original;
4791         // Check if the variable is the target-based BasePointersArray,
4792         // PointersArray or SizesArray.
4793         LValue SharedRefLValue;
4794         QualType Type = PrivateLValue.getType();
4795         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4796         if (IsTargetTask && !SharedField) {
4797           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4798                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4799                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4800                          ->getNumParams() == 0 &&
4801                  isa<TranslationUnitDecl>(
4802                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4803                          ->getDeclContext()) &&
4804                  "Expected artificial target data variable.");
4805           SharedRefLValue =
4806               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4807         } else if (ForDup) {
4808           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4809           SharedRefLValue = CGF.MakeAddrLValue(
4810               Address(SharedRefLValue.getPointer(CGF),
4811                       C.getDeclAlign(OriginalVD)),
4812               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4813               SharedRefLValue.getTBAAInfo());
4814         } else {
4815           InlinedOpenMPRegionRAII Region(
4816               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4817               /*HasCancel=*/false);
4818           SharedRefLValue =  CGF.EmitLValue(Pair.second.OriginalRef);
4819         }
4820         if (Type->isArrayType()) {
4821           // Initialize firstprivate array.
4822           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4823             // Perform simple memcpy.
4824             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4825           } else {
4826             // Initialize firstprivate array using element-by-element
4827             // initialization.
4828             CGF.EmitOMPAggregateAssign(
4829                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4830                 Type,
4831                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4832                                                   Address SrcElement) {
4833                   // Clean up any temporaries needed by the initialization.
4834                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4835                   InitScope.addPrivate(
4836                       Elem, [SrcElement]() -> Address { return SrcElement; });
4837                   (void)InitScope.Privatize();
4838                   // Emit initialization for single element.
4839                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4840                       CGF, &CapturesInfo);
4841                   CGF.EmitAnyExprToMem(Init, DestElement,
4842                                        Init->getType().getQualifiers(),
4843                                        /*IsInitializer=*/false);
4844                 });
4845           }
4846         } else {
4847           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4848           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4849             return SharedRefLValue.getAddress(CGF);
4850           });
4851           (void)InitScope.Privatize();
4852           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4853           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4854                              /*capturedByInit=*/false);
4855         }
4856       } else {
4857         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4858       }
4859     }
4860     ++FI;
4861   }
4862 }
4863 
4864 /// Check if duplication function is required for taskloops.
4865 static bool checkInitIsRequired(CodeGenFunction &CGF,
4866                                 ArrayRef<PrivateDataTy> Privates) {
4867   bool InitRequired = false;
4868   for (const PrivateDataTy &Pair : Privates) {
4869     const VarDecl *VD = Pair.second.PrivateCopy;
4870     const Expr *Init = VD->getAnyInitializer();
4871     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4872                                     !CGF.isTrivialInitializer(Init));
4873     if (InitRequired)
4874       break;
4875   }
4876   return InitRequired;
4877 }
4878 
4879 
4880 /// Emit task_dup function (for initialization of
4881 /// private/firstprivate/lastprivate vars and last_iter flag)
4882 /// \code
4883 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4884 /// lastpriv) {
4885 /// // setup lastprivate flag
4886 ///    task_dst->last = lastpriv;
4887 /// // could be constructor calls here...
4888 /// }
4889 /// \endcode
4890 static llvm::Value *
4891 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4892                     const OMPExecutableDirective &D,
4893                     QualType KmpTaskTWithPrivatesPtrQTy,
4894                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4895                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4896                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4897                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4898   ASTContext &C = CGM.getContext();
4899   FunctionArgList Args;
4900   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4901                            KmpTaskTWithPrivatesPtrQTy,
4902                            ImplicitParamDecl::Other);
4903   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4904                            KmpTaskTWithPrivatesPtrQTy,
4905                            ImplicitParamDecl::Other);
4906   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4907                                 ImplicitParamDecl::Other);
4908   Args.push_back(&DstArg);
4909   Args.push_back(&SrcArg);
4910   Args.push_back(&LastprivArg);
4911   const auto &TaskDupFnInfo =
4912       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4913   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4914   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4915   auto *TaskDup = llvm::Function::Create(
4916       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4917   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4918   TaskDup->setDoesNotRecurse();
4919   CodeGenFunction CGF(CGM);
4920   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4921                     Loc);
4922 
4923   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4924       CGF.GetAddrOfLocalVar(&DstArg),
4925       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4926   // task_dst->liter = lastpriv;
4927   if (WithLastIter) {
4928     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4929     LValue Base = CGF.EmitLValueForField(
4930         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4931     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4932     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4933         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4934     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4935   }
4936 
4937   // Emit initial values for private copies (if any).
4938   assert(!Privates.empty());
4939   Address KmpTaskSharedsPtr = Address::invalid();
4940   if (!Data.FirstprivateVars.empty()) {
4941     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4942         CGF.GetAddrOfLocalVar(&SrcArg),
4943         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4944     LValue Base = CGF.EmitLValueForField(
4945         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4946     KmpTaskSharedsPtr = Address(
4947         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4948                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4949                                                   KmpTaskTShareds)),
4950                              Loc),
4951         CGF.getNaturalTypeAlignment(SharedsTy));
4952   }
4953   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4954                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4955   CGF.FinishFunction();
4956   return TaskDup;
4957 }
4958 
4959 /// Checks if destructor function is required to be generated.
4960 /// \return true if cleanups are required, false otherwise.
4961 static bool
4962 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4963   bool NeedsCleanup = false;
4964   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4965   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4966   for (const FieldDecl *FD : PrivateRD->fields()) {
4967     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4968     if (NeedsCleanup)
4969       break;
4970   }
4971   return NeedsCleanup;
4972 }
4973 
4974 CGOpenMPRuntime::TaskResultTy
4975 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4976                               const OMPExecutableDirective &D,
4977                               llvm::Function *TaskFunction, QualType SharedsTy,
4978                               Address Shareds, const OMPTaskDataTy &Data) {
4979   ASTContext &C = CGM.getContext();
4980   llvm::SmallVector<PrivateDataTy, 4> Privates;
4981   // Aggregate privates and sort them by the alignment.
4982   const auto *I = Data.PrivateCopies.begin();
4983   for (const Expr *E : Data.PrivateVars) {
4984     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4985     Privates.emplace_back(
4986         C.getDeclAlign(VD),
4987         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4988                          /*PrivateElemInit=*/nullptr));
4989     ++I;
4990   }
4991   I = Data.FirstprivateCopies.begin();
4992   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4993   for (const Expr *E : Data.FirstprivateVars) {
4994     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4995     Privates.emplace_back(
4996         C.getDeclAlign(VD),
4997         PrivateHelpersTy(
4998             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4999             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5000     ++I;
5001     ++IElemInitRef;
5002   }
5003   I = Data.LastprivateCopies.begin();
5004   for (const Expr *E : Data.LastprivateVars) {
5005     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5006     Privates.emplace_back(
5007         C.getDeclAlign(VD),
5008         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5009                          /*PrivateElemInit=*/nullptr));
5010     ++I;
5011   }
5012   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5013     return L.first > R.first;
5014   });
5015   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5016   // Build type kmp_routine_entry_t (if not built yet).
5017   emitKmpRoutineEntryT(KmpInt32Ty);
5018   // Build type kmp_task_t (if not built yet).
5019   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5020     if (SavedKmpTaskloopTQTy.isNull()) {
5021       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5022           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5023     }
5024     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5025   } else {
5026     assert((D.getDirectiveKind() == OMPD_task ||
5027             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5028             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5029            "Expected taskloop, task or target directive");
5030     if (SavedKmpTaskTQTy.isNull()) {
5031       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5032           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5033     }
5034     KmpTaskTQTy = SavedKmpTaskTQTy;
5035   }
5036   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5037   // Build particular struct kmp_task_t for the given task.
5038   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5039       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5040   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5041   QualType KmpTaskTWithPrivatesPtrQTy =
5042       C.getPointerType(KmpTaskTWithPrivatesQTy);
5043   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5044   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5045       KmpTaskTWithPrivatesTy->getPointerTo();
5046   llvm::Value *KmpTaskTWithPrivatesTySize =
5047       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5048   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5049 
5050   // Emit initial values for private copies (if any).
5051   llvm::Value *TaskPrivatesMap = nullptr;
5052   llvm::Type *TaskPrivatesMapTy =
5053       std::next(TaskFunction->arg_begin(), 3)->getType();
5054   if (!Privates.empty()) {
5055     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5056     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5057         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5058         FI->getType(), Privates);
5059     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5060         TaskPrivatesMap, TaskPrivatesMapTy);
5061   } else {
5062     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5063         cast<llvm::PointerType>(TaskPrivatesMapTy));
5064   }
5065   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5066   // kmp_task_t *tt);
5067   llvm::Function *TaskEntry = emitProxyTaskFunction(
5068       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5069       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5070       TaskPrivatesMap);
5071 
5072   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5073   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5074   // kmp_routine_entry_t *task_entry);
5075   // Task flags. Format is taken from
5076   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5077   // description of kmp_tasking_flags struct.
5078   enum {
5079     TiedFlag = 0x1,
5080     FinalFlag = 0x2,
5081     DestructorsFlag = 0x8,
5082     PriorityFlag = 0x20,
5083     DetachableFlag = 0x40,
5084   };
5085   unsigned Flags = Data.Tied ? TiedFlag : 0;
5086   bool NeedsCleanup = false;
5087   if (!Privates.empty()) {
5088     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5089     if (NeedsCleanup)
5090       Flags = Flags | DestructorsFlag;
5091   }
5092   if (Data.Priority.getInt())
5093     Flags = Flags | PriorityFlag;
5094   if (D.hasClausesOfKind<OMPDetachClause>())
5095     Flags = Flags | DetachableFlag;
5096   llvm::Value *TaskFlags =
5097       Data.Final.getPointer()
5098           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5099                                      CGF.Builder.getInt32(FinalFlag),
5100                                      CGF.Builder.getInt32(/*C=*/0))
5101           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5102   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5103   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5104   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5105       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5106       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5107           TaskEntry, KmpRoutineEntryPtrTy)};
5108   llvm::Value *NewTask;
5109   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5110     // Check if we have any device clause associated with the directive.
5111     const Expr *Device = nullptr;
5112     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5113       Device = C->getDevice();
5114     // Emit device ID if any otherwise use default value.
5115     llvm::Value *DeviceID;
5116     if (Device)
5117       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5118                                            CGF.Int64Ty, /*isSigned=*/true);
5119     else
5120       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5121     AllocArgs.push_back(DeviceID);
5122     NewTask = CGF.EmitRuntimeCall(
5123       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5124   } else {
5125     NewTask = CGF.EmitRuntimeCall(
5126       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5127   }
5128   // Emit detach clause initialization.
5129   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
5130   // task_descriptor);
5131   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
5132     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
5133     LValue EvtLVal = CGF.EmitLValue(Evt);
5134 
5135     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
5136     // int gtid, kmp_task_t *task);
5137     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
5138     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
5139     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
5140     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
5141         createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event),
5142         {Loc, Tid, NewTask});
5143     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
5144                                       Evt->getExprLoc());
5145     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
5146   }
5147   llvm::Value *NewTaskNewTaskTTy =
5148       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5149           NewTask, KmpTaskTWithPrivatesPtrTy);
5150   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5151                                                KmpTaskTWithPrivatesQTy);
5152   LValue TDBase =
5153       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5154   // Fill the data in the resulting kmp_task_t record.
5155   // Copy shareds if there are any.
5156   Address KmpTaskSharedsPtr = Address::invalid();
5157   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5158     KmpTaskSharedsPtr =
5159         Address(CGF.EmitLoadOfScalar(
5160                     CGF.EmitLValueForField(
5161                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5162                                            KmpTaskTShareds)),
5163                     Loc),
5164                 CGF.getNaturalTypeAlignment(SharedsTy));
5165     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5166     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5167     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5168   }
5169   // Emit initial values for private copies (if any).
5170   TaskResultTy Result;
5171   if (!Privates.empty()) {
5172     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5173                      SharedsTy, SharedsPtrTy, Data, Privates,
5174                      /*ForDup=*/false);
5175     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5176         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5177       Result.TaskDupFn = emitTaskDupFunction(
5178           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5179           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5180           /*WithLastIter=*/!Data.LastprivateVars.empty());
5181     }
5182   }
5183   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5184   enum { Priority = 0, Destructors = 1 };
5185   // Provide pointer to function with destructors for privates.
5186   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5187   const RecordDecl *KmpCmplrdataUD =
5188       (*FI)->getType()->getAsUnionType()->getDecl();
5189   if (NeedsCleanup) {
5190     llvm::Value *DestructorFn = emitDestructorsFunction(
5191         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5192         KmpTaskTWithPrivatesQTy);
5193     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5194     LValue DestructorsLV = CGF.EmitLValueForField(
5195         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5196     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5197                               DestructorFn, KmpRoutineEntryPtrTy),
5198                           DestructorsLV);
5199   }
5200   // Set priority.
5201   if (Data.Priority.getInt()) {
5202     LValue Data2LV = CGF.EmitLValueForField(
5203         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5204     LValue PriorityLV = CGF.EmitLValueForField(
5205         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5206     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5207   }
5208   Result.NewTask = NewTask;
5209   Result.TaskEntry = TaskEntry;
5210   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5211   Result.TDBase = TDBase;
5212   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5213   return Result;
5214 }
5215 
5216 namespace {
5217 /// Dependence kind for RTL.
5218 enum RTLDependenceKindTy {
5219   DepIn = 0x01,
5220   DepInOut = 0x3,
5221   DepMutexInOutSet = 0x4
5222 };
5223 /// Fields ids in kmp_depend_info record.
5224 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5225 } // namespace
5226 
5227 /// Translates internal dependency kind into the runtime kind.
5228 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5229   RTLDependenceKindTy DepKind;
5230   switch (K) {
5231   case OMPC_DEPEND_in:
5232     DepKind = DepIn;
5233     break;
5234   // Out and InOut dependencies must use the same code.
5235   case OMPC_DEPEND_out:
5236   case OMPC_DEPEND_inout:
5237     DepKind = DepInOut;
5238     break;
5239   case OMPC_DEPEND_mutexinoutset:
5240     DepKind = DepMutexInOutSet;
5241     break;
5242   case OMPC_DEPEND_source:
5243   case OMPC_DEPEND_sink:
5244   case OMPC_DEPEND_depobj:
5245   case OMPC_DEPEND_unknown:
5246     llvm_unreachable("Unknown task dependence type");
5247   }
5248   return DepKind;
5249 }
5250 
5251 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5252 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5253                            QualType &FlagsTy) {
5254   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5255   if (KmpDependInfoTy.isNull()) {
5256     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5257     KmpDependInfoRD->startDefinition();
5258     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5259     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5260     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5261     KmpDependInfoRD->completeDefinition();
5262     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5263   }
5264 }
5265 
5266 std::pair<llvm::Value *, LValue>
5267 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5268                                    SourceLocation Loc) {
5269   ASTContext &C = CGM.getContext();
5270   QualType FlagsTy;
5271   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5272   RecordDecl *KmpDependInfoRD =
5273       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5274   LValue Base = CGF.EmitLoadOfPointerLValue(
5275       DepobjLVal.getAddress(CGF),
5276       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5277   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5278   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5279           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5280   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5281                             Base.getTBAAInfo());
5282   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5283       Addr.getPointer(),
5284       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5285   LValue NumDepsBase = CGF.MakeAddrLValue(
5286       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5287       Base.getBaseInfo(), Base.getTBAAInfo());
5288   // NumDeps = deps[i].base_addr;
5289   LValue BaseAddrLVal = CGF.EmitLValueForField(
5290       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5291   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5292   return std::make_pair(NumDeps, Base);
5293 }
5294 
5295 namespace {
5296 /// Loop generator for OpenMP iterator expression.
5297 class OMPIteratorGeneratorScope final
5298     : public CodeGenFunction::OMPPrivateScope {
5299   CodeGenFunction &CGF;
5300   const OMPIteratorExpr *E = nullptr;
5301   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
5302   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
5303   OMPIteratorGeneratorScope() = delete;
5304   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
5305 
5306 public:
5307   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
5308       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
5309     if (!E)
5310       return;
5311     SmallVector<llvm::Value *, 4> Uppers;
5312     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5313       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
5314       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
5315       addPrivate(VD, [&CGF, VD]() {
5316         return CGF.CreateMemTemp(VD->getType(), VD->getName());
5317       });
5318       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5319       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
5320         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
5321                                  "counter.addr");
5322       });
5323     }
5324     Privatize();
5325 
5326     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5327       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5328       LValue CLVal =
5329           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
5330                              HelperData.CounterVD->getType());
5331       // Counter = 0;
5332       CGF.EmitStoreOfScalar(
5333           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
5334           CLVal);
5335       CodeGenFunction::JumpDest &ContDest =
5336           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
5337       CodeGenFunction::JumpDest &ExitDest =
5338           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
5339       // N = <number-of_iterations>;
5340       llvm::Value *N = Uppers[I];
5341       // cont:
5342       // if (Counter < N) goto body; else goto exit;
5343       CGF.EmitBlock(ContDest.getBlock());
5344       auto *CVal =
5345           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
5346       llvm::Value *Cmp =
5347           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
5348               ? CGF.Builder.CreateICmpSLT(CVal, N)
5349               : CGF.Builder.CreateICmpULT(CVal, N);
5350       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
5351       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
5352       // body:
5353       CGF.EmitBlock(BodyBB);
5354       // Iteri = Begini + Counter * Stepi;
5355       CGF.EmitIgnoredExpr(HelperData.Update);
5356     }
5357   }
5358   ~OMPIteratorGeneratorScope() {
5359     if (!E)
5360       return;
5361     for (unsigned I = E->numOfIterators(); I > 0; --I) {
5362       // Counter = Counter + 1;
5363       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
5364       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
5365       // goto cont;
5366       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
5367       // exit:
5368       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
5369     }
5370   }
5371 };
5372 } // namespace
5373 
5374 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5375                            llvm::PointerUnion<unsigned *, LValue *> Pos,
5376                            const OMPTaskDataTy::DependData &Data,
5377                            Address DependenciesArray) {
5378   CodeGenModule &CGM = CGF.CGM;
5379   ASTContext &C = CGM.getContext();
5380   QualType FlagsTy;
5381   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5382   RecordDecl *KmpDependInfoRD =
5383       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5384   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5385 
5386   OMPIteratorGeneratorScope IteratorScope(
5387       CGF, cast_or_null<OMPIteratorExpr>(
5388                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5389                                  : nullptr));
5390   for (const Expr *E : Data.DepExprs) {
5391     const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
5392     llvm::Value *Addr;
5393     if (OASE) {
5394       const Expr *Base = OASE->getBase();
5395       Addr = CGF.EmitScalarExpr(Base);
5396     } else {
5397       Addr = CGF.EmitLValue(E).getPointer(CGF);
5398     }
5399     llvm::Value *Size;
5400     QualType Ty = E->getType();
5401     if (OASE) {
5402       Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
5403       for (const Expr *SE : OASE->getDimensions()) {
5404         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
5405         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
5406                                       CGF.getContext().getSizeType(),
5407                                       SE->getExprLoc());
5408         Size = CGF.Builder.CreateNUWMul(Size, Sz);
5409       }
5410     } else if (const auto *ASE =
5411                    dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5412       LValue UpAddrLVal =
5413           CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5414       llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5415           UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5416       llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
5417       llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5418       Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5419     } else {
5420       Size = CGF.getTypeSize(Ty);
5421     }
5422     LValue Base;
5423     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5424       Base = CGF.MakeAddrLValue(
5425           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
5426     } else {
5427       LValue &PosLVal = *Pos.get<LValue *>();
5428       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5429       Base = CGF.MakeAddrLValue(
5430           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
5431                   DependenciesArray.getAlignment()),
5432           KmpDependInfoTy);
5433     }
5434     // deps[i].base_addr = &<Dependencies[i].second>;
5435     LValue BaseAddrLVal = CGF.EmitLValueForField(
5436         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5437     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
5438                           BaseAddrLVal);
5439     // deps[i].len = sizeof(<Dependencies[i].second>);
5440     LValue LenLVal = CGF.EmitLValueForField(
5441         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5442     CGF.EmitStoreOfScalar(Size, LenLVal);
5443     // deps[i].flags = <Dependencies[i].first>;
5444     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
5445     LValue FlagsLVal = CGF.EmitLValueForField(
5446         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5447     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5448                           FlagsLVal);
5449     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5450       ++(*P);
5451     } else {
5452       LValue &PosLVal = *Pos.get<LValue *>();
5453       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5454       Idx = CGF.Builder.CreateNUWAdd(Idx,
5455                                      llvm::ConstantInt::get(Idx->getType(), 1));
5456       CGF.EmitStoreOfScalar(Idx, PosLVal);
5457     }
5458   }
5459 }
5460 
5461 static SmallVector<llvm::Value *, 4>
5462 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5463                         const OMPTaskDataTy::DependData &Data) {
5464   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5465          "Expected depobj dependecy kind.");
5466   SmallVector<llvm::Value *, 4> Sizes;
5467   SmallVector<LValue, 4> SizeLVals;
5468   ASTContext &C = CGF.getContext();
5469   QualType FlagsTy;
5470   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5471   RecordDecl *KmpDependInfoRD =
5472       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5473   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5474   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5475   {
5476     OMPIteratorGeneratorScope IteratorScope(
5477         CGF, cast_or_null<OMPIteratorExpr>(
5478                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5479                                    : nullptr));
5480     for (const Expr *E : Data.DepExprs) {
5481       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5482       LValue Base = CGF.EmitLoadOfPointerLValue(
5483           DepobjLVal.getAddress(CGF),
5484           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5485       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5486           Base.getAddress(CGF), KmpDependInfoPtrT);
5487       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5488                                 Base.getTBAAInfo());
5489       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5490           Addr.getPointer(),
5491           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5492       LValue NumDepsBase = CGF.MakeAddrLValue(
5493           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5494           Base.getBaseInfo(), Base.getTBAAInfo());
5495       // NumDeps = deps[i].base_addr;
5496       LValue BaseAddrLVal = CGF.EmitLValueForField(
5497           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5498       llvm::Value *NumDeps =
5499           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5500       LValue NumLVal = CGF.MakeAddrLValue(
5501           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
5502           C.getUIntPtrType());
5503       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
5504                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
5505       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
5506       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
5507       CGF.EmitStoreOfScalar(Add, NumLVal);
5508       SizeLVals.push_back(NumLVal);
5509     }
5510   }
5511   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
5512     llvm::Value *Size =
5513         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
5514     Sizes.push_back(Size);
5515   }
5516   return Sizes;
5517 }
5518 
5519 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5520                                LValue PosLVal,
5521                                const OMPTaskDataTy::DependData &Data,
5522                                Address DependenciesArray) {
5523   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5524          "Expected depobj dependecy kind.");
5525   ASTContext &C = CGF.getContext();
5526   QualType FlagsTy;
5527   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5528   RecordDecl *KmpDependInfoRD =
5529       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5530   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5531   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5532   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5533   {
5534     OMPIteratorGeneratorScope IteratorScope(
5535         CGF, cast_or_null<OMPIteratorExpr>(
5536                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5537                                    : nullptr));
5538     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
5539       const Expr *E = Data.DepExprs[I];
5540       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5541       LValue Base = CGF.EmitLoadOfPointerLValue(
5542           DepobjLVal.getAddress(CGF),
5543           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5544       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5545           Base.getAddress(CGF), KmpDependInfoPtrT);
5546       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5547                                 Base.getTBAAInfo());
5548 
5549       // Get number of elements in a single depobj.
5550       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5551           Addr.getPointer(),
5552           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5553       LValue NumDepsBase = CGF.MakeAddrLValue(
5554           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5555           Base.getBaseInfo(), Base.getTBAAInfo());
5556       // NumDeps = deps[i].base_addr;
5557       LValue BaseAddrLVal = CGF.EmitLValueForField(
5558           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5559       llvm::Value *NumDeps =
5560           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5561 
5562       // memcopy dependency data.
5563       llvm::Value *Size = CGF.Builder.CreateNUWMul(
5564           ElSize,
5565           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
5566       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5567       Address DepAddr =
5568           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
5569                   DependenciesArray.getAlignment());
5570       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
5571 
5572       // Increase pos.
5573       // pos += size;
5574       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
5575       CGF.EmitStoreOfScalar(Add, PosLVal);
5576     }
5577   }
5578 }
5579 
5580 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5581     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
5582     SourceLocation Loc) {
5583   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
5584         return D.DepExprs.empty();
5585       }))
5586     return std::make_pair(nullptr, Address::invalid());
5587   // Process list of dependencies.
5588   ASTContext &C = CGM.getContext();
5589   Address DependenciesArray = Address::invalid();
5590   llvm::Value *NumOfElements = nullptr;
5591   unsigned NumDependencies = std::accumulate(
5592       Dependencies.begin(), Dependencies.end(), 0,
5593       [](unsigned V, const OMPTaskDataTy::DependData &D) {
5594         return D.DepKind == OMPC_DEPEND_depobj
5595                    ? V
5596                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
5597       });
5598   QualType FlagsTy;
5599   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5600   bool HasDepobjDeps = false;
5601   bool HasRegularWithIterators = false;
5602   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5603   llvm::Value *NumOfRegularWithIterators =
5604       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
5605   // Calculate number of depobj dependecies and regular deps with the iterators.
5606   for (const OMPTaskDataTy::DependData &D : Dependencies) {
5607     if (D.DepKind == OMPC_DEPEND_depobj) {
5608       SmallVector<llvm::Value *, 4> Sizes =
5609           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
5610       for (llvm::Value *Size : Sizes) {
5611         NumOfDepobjElements =
5612             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
5613       }
5614       HasDepobjDeps = true;
5615       continue;
5616     }
5617     // Include number of iterations, if any.
5618     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
5619       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5620         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5621         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
5622         NumOfRegularWithIterators =
5623             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
5624       }
5625       HasRegularWithIterators = true;
5626       continue;
5627     }
5628   }
5629 
5630   QualType KmpDependInfoArrayTy;
5631   if (HasDepobjDeps || HasRegularWithIterators) {
5632     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
5633                                            /*isSigned=*/false);
5634     if (HasDepobjDeps) {
5635       NumOfElements =
5636           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
5637     }
5638     if (HasRegularWithIterators) {
5639       NumOfElements =
5640           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
5641     }
5642     OpaqueValueExpr OVE(Loc,
5643                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5644                         VK_RValue);
5645     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5646                                                   RValue::get(NumOfElements));
5647     KmpDependInfoArrayTy =
5648         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5649                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5650     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5651     // Properly emit variable-sized array.
5652     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5653                                          ImplicitParamDecl::Other);
5654     CGF.EmitVarDecl(*PD);
5655     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5656     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5657                                               /*isSigned=*/false);
5658   } else {
5659     KmpDependInfoArrayTy = C.getConstantArrayType(
5660         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
5661         ArrayType::Normal, /*IndexTypeQuals=*/0);
5662     DependenciesArray =
5663         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5664     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
5665     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5666                                            /*isSigned=*/false);
5667   }
5668   unsigned Pos = 0;
5669   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5670     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5671         Dependencies[I].IteratorExpr)
5672       continue;
5673     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
5674                    DependenciesArray);
5675   }
5676   // Copy regular dependecies with iterators.
5677   LValue PosLVal = CGF.MakeAddrLValue(
5678       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
5679   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
5680   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5681     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5682         !Dependencies[I].IteratorExpr)
5683       continue;
5684     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
5685                    DependenciesArray);
5686   }
5687   // Copy final depobj arrays without iterators.
5688   if (HasDepobjDeps) {
5689     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5690       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
5691         continue;
5692       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
5693                          DependenciesArray);
5694     }
5695   }
5696   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5697       DependenciesArray, CGF.VoidPtrTy);
5698   return std::make_pair(NumOfElements, DependenciesArray);
5699 }
5700 
5701 Address CGOpenMPRuntime::emitDepobjDependClause(
5702     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
5703     SourceLocation Loc) {
5704   if (Dependencies.DepExprs.empty())
5705     return Address::invalid();
5706   // Process list of dependencies.
5707   ASTContext &C = CGM.getContext();
5708   Address DependenciesArray = Address::invalid();
5709   unsigned NumDependencies = Dependencies.DepExprs.size();
5710   QualType FlagsTy;
5711   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5712   RecordDecl *KmpDependInfoRD =
5713       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5714 
5715   llvm::Value *Size;
5716   // Define type kmp_depend_info[<Dependencies.size()>];
5717   // For depobj reserve one extra element to store the number of elements.
5718   // It is required to handle depobj(x) update(in) construct.
5719   // kmp_depend_info[<Dependencies.size()>] deps;
5720   llvm::Value *NumDepsVal;
5721   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5722   if (const auto *IE =
5723           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5724     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5725     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5726       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5727       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5728       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5729     }
5730     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5731                                     NumDepsVal);
5732     CharUnits SizeInBytes =
5733         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5734     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5735     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5736     NumDepsVal =
5737         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5738   } else {
5739     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5740         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5741         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5742     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5743     Size = CGM.getSize(Sz.alignTo(Align));
5744     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5745   }
5746   // Need to allocate on the dynamic memory.
5747   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5748   // Use default allocator.
5749   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5750   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5751 
5752   llvm::Value *Addr = CGF.EmitRuntimeCall(
5753       createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5754   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5755       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5756   DependenciesArray = Address(Addr, Align);
5757   // Write number of elements in the first element of array for depobj.
5758   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5759   // deps[i].base_addr = NumDependencies;
5760   LValue BaseAddrLVal = CGF.EmitLValueForField(
5761       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5762   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5763   llvm::PointerUnion<unsigned *, LValue *> Pos;
5764   unsigned Idx = 1;
5765   LValue PosLVal;
5766   if (Dependencies.IteratorExpr) {
5767     PosLVal = CGF.MakeAddrLValue(
5768         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5769         C.getSizeType());
5770     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5771                           /*IsInit=*/true);
5772     Pos = &PosLVal;
5773   } else {
5774     Pos = &Idx;
5775   }
5776   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5777   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5778       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5779   return DependenciesArray;
5780 }
5781 
5782 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5783                                         SourceLocation Loc) {
5784   ASTContext &C = CGM.getContext();
5785   QualType FlagsTy;
5786   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5787   LValue Base = CGF.EmitLoadOfPointerLValue(
5788       DepobjLVal.getAddress(CGF),
5789       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5790   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5791   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5792       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5793   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5794       Addr.getPointer(),
5795       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5796   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5797                                                                CGF.VoidPtrTy);
5798   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5799   // Use default allocator.
5800   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5801   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5802 
5803   // _kmpc_free(gtid, addr, nullptr);
5804   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5805 }
5806 
5807 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5808                                        OpenMPDependClauseKind NewDepKind,
5809                                        SourceLocation Loc) {
5810   ASTContext &C = CGM.getContext();
5811   QualType FlagsTy;
5812   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5813   RecordDecl *KmpDependInfoRD =
5814       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5815   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5816   llvm::Value *NumDeps;
5817   LValue Base;
5818   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5819 
5820   Address Begin = Base.getAddress(CGF);
5821   // Cast from pointer to array type to pointer to single element.
5822   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5823   // The basic structure here is a while-do loop.
5824   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5825   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5826   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5827   CGF.EmitBlock(BodyBB);
5828   llvm::PHINode *ElementPHI =
5829       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5830   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5831   Begin = Address(ElementPHI, Begin.getAlignment());
5832   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5833                             Base.getTBAAInfo());
5834   // deps[i].flags = NewDepKind;
5835   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5836   LValue FlagsLVal = CGF.EmitLValueForField(
5837       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5838   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5839                         FlagsLVal);
5840 
5841   // Shift the address forward by one element.
5842   Address ElementNext =
5843       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5844   ElementPHI->addIncoming(ElementNext.getPointer(),
5845                           CGF.Builder.GetInsertBlock());
5846   llvm::Value *IsEmpty =
5847       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5848   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5849   // Done.
5850   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5851 }
5852 
5853 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5854                                    const OMPExecutableDirective &D,
5855                                    llvm::Function *TaskFunction,
5856                                    QualType SharedsTy, Address Shareds,
5857                                    const Expr *IfCond,
5858                                    const OMPTaskDataTy &Data) {
5859   if (!CGF.HaveInsertPoint())
5860     return;
5861 
5862   TaskResultTy Result =
5863       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5864   llvm::Value *NewTask = Result.NewTask;
5865   llvm::Function *TaskEntry = Result.TaskEntry;
5866   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5867   LValue TDBase = Result.TDBase;
5868   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5869   // Process list of dependences.
5870   Address DependenciesArray = Address::invalid();
5871   llvm::Value *NumOfElements;
5872   std::tie(NumOfElements, DependenciesArray) =
5873       emitDependClause(CGF, Data.Dependences, Loc);
5874 
5875   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5876   // libcall.
5877   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5878   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5879   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5880   // list is not empty
5881   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5882   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5883   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5884   llvm::Value *DepTaskArgs[7];
5885   if (!Data.Dependences.empty()) {
5886     DepTaskArgs[0] = UpLoc;
5887     DepTaskArgs[1] = ThreadID;
5888     DepTaskArgs[2] = NewTask;
5889     DepTaskArgs[3] = NumOfElements;
5890     DepTaskArgs[4] = DependenciesArray.getPointer();
5891     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5892     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5893   }
5894   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5895                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5896     if (!Data.Tied) {
5897       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5898       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5899       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5900     }
5901     if (!Data.Dependences.empty()) {
5902       CGF.EmitRuntimeCall(
5903           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5904     } else {
5905       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5906                           TaskArgs);
5907     }
5908     // Check if parent region is untied and build return for untied task;
5909     if (auto *Region =
5910             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5911       Region->emitUntiedSwitch(CGF);
5912   };
5913 
5914   llvm::Value *DepWaitTaskArgs[6];
5915   if (!Data.Dependences.empty()) {
5916     DepWaitTaskArgs[0] = UpLoc;
5917     DepWaitTaskArgs[1] = ThreadID;
5918     DepWaitTaskArgs[2] = NumOfElements;
5919     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5920     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5921     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5922   }
5923   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5924                         &Data, &DepWaitTaskArgs,
5925                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5926     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5927     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5928     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5929     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5930     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5931     // is specified.
5932     if (!Data.Dependences.empty())
5933       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5934                           DepWaitTaskArgs);
5935     // Call proxy_task_entry(gtid, new_task);
5936     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5937                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5938       Action.Enter(CGF);
5939       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5940       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5941                                                           OutlinedFnArgs);
5942     };
5943 
5944     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5945     // kmp_task_t *new_task);
5946     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5947     // kmp_task_t *new_task);
5948     RegionCodeGenTy RCG(CodeGen);
5949     CommonActionTy Action(
5950         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5951         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5952     RCG.setAction(Action);
5953     RCG(CGF);
5954   };
5955 
5956   if (IfCond) {
5957     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5958   } else {
5959     RegionCodeGenTy ThenRCG(ThenCodeGen);
5960     ThenRCG(CGF);
5961   }
5962 }
5963 
5964 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5965                                        const OMPLoopDirective &D,
5966                                        llvm::Function *TaskFunction,
5967                                        QualType SharedsTy, Address Shareds,
5968                                        const Expr *IfCond,
5969                                        const OMPTaskDataTy &Data) {
5970   if (!CGF.HaveInsertPoint())
5971     return;
5972   TaskResultTy Result =
5973       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5974   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5975   // libcall.
5976   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5977   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5978   // sched, kmp_uint64 grainsize, void *task_dup);
5979   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5980   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5981   llvm::Value *IfVal;
5982   if (IfCond) {
5983     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5984                                       /*isSigned=*/true);
5985   } else {
5986     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5987   }
5988 
5989   LValue LBLVal = CGF.EmitLValueForField(
5990       Result.TDBase,
5991       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5992   const auto *LBVar =
5993       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5994   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5995                        LBLVal.getQuals(),
5996                        /*IsInitializer=*/true);
5997   LValue UBLVal = CGF.EmitLValueForField(
5998       Result.TDBase,
5999       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
6000   const auto *UBVar =
6001       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
6002   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
6003                        UBLVal.getQuals(),
6004                        /*IsInitializer=*/true);
6005   LValue StLVal = CGF.EmitLValueForField(
6006       Result.TDBase,
6007       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
6008   const auto *StVar =
6009       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
6010   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
6011                        StLVal.getQuals(),
6012                        /*IsInitializer=*/true);
6013   // Store reductions address.
6014   LValue RedLVal = CGF.EmitLValueForField(
6015       Result.TDBase,
6016       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
6017   if (Data.Reductions) {
6018     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
6019   } else {
6020     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
6021                                CGF.getContext().VoidPtrTy);
6022   }
6023   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
6024   llvm::Value *TaskArgs[] = {
6025       UpLoc,
6026       ThreadID,
6027       Result.NewTask,
6028       IfVal,
6029       LBLVal.getPointer(CGF),
6030       UBLVal.getPointer(CGF),
6031       CGF.EmitLoadOfScalar(StLVal, Loc),
6032       llvm::ConstantInt::getSigned(
6033           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
6034       llvm::ConstantInt::getSigned(
6035           CGF.IntTy, Data.Schedule.getPointer()
6036                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
6037                          : NoSchedule),
6038       Data.Schedule.getPointer()
6039           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
6040                                       /*isSigned=*/false)
6041           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
6042       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6043                              Result.TaskDupFn, CGF.VoidPtrTy)
6044                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
6045   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
6046 }
6047 
6048 /// Emit reduction operation for each element of array (required for
6049 /// array sections) LHS op = RHS.
6050 /// \param Type Type of array.
6051 /// \param LHSVar Variable on the left side of the reduction operation
6052 /// (references element of array in original variable).
6053 /// \param RHSVar Variable on the right side of the reduction operation
6054 /// (references element of array in original variable).
6055 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
6056 /// RHSVar.
6057 static void EmitOMPAggregateReduction(
6058     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
6059     const VarDecl *RHSVar,
6060     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
6061                                   const Expr *, const Expr *)> &RedOpGen,
6062     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
6063     const Expr *UpExpr = nullptr) {
6064   // Perform element-by-element initialization.
6065   QualType ElementTy;
6066   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
6067   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
6068 
6069   // Drill down to the base element type on both arrays.
6070   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
6071   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
6072 
6073   llvm::Value *RHSBegin = RHSAddr.getPointer();
6074   llvm::Value *LHSBegin = LHSAddr.getPointer();
6075   // Cast from pointer to array type to pointer to single element.
6076   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
6077   // The basic structure here is a while-do loop.
6078   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
6079   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
6080   llvm::Value *IsEmpty =
6081       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
6082   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
6083 
6084   // Enter the loop body, making that address the current address.
6085   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
6086   CGF.EmitBlock(BodyBB);
6087 
6088   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
6089 
6090   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
6091       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
6092   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
6093   Address RHSElementCurrent =
6094       Address(RHSElementPHI,
6095               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6096 
6097   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
6098       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
6099   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
6100   Address LHSElementCurrent =
6101       Address(LHSElementPHI,
6102               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6103 
6104   // Emit copy.
6105   CodeGenFunction::OMPPrivateScope Scope(CGF);
6106   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
6107   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
6108   Scope.Privatize();
6109   RedOpGen(CGF, XExpr, EExpr, UpExpr);
6110   Scope.ForceCleanup();
6111 
6112   // Shift the address forward by one element.
6113   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
6114       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
6115   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
6116       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
6117   // Check whether we've reached the end.
6118   llvm::Value *Done =
6119       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
6120   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
6121   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
6122   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
6123 
6124   // Done.
6125   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
6126 }
6127 
6128 /// Emit reduction combiner. If the combiner is a simple expression emit it as
6129 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
6130 /// UDR combiner function.
6131 static void emitReductionCombiner(CodeGenFunction &CGF,
6132                                   const Expr *ReductionOp) {
6133   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
6134     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
6135       if (const auto *DRE =
6136               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
6137         if (const auto *DRD =
6138                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
6139           std::pair<llvm::Function *, llvm::Function *> Reduction =
6140               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
6141           RValue Func = RValue::get(Reduction.first);
6142           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
6143           CGF.EmitIgnoredExpr(ReductionOp);
6144           return;
6145         }
6146   CGF.EmitIgnoredExpr(ReductionOp);
6147 }
6148 
6149 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
6150     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
6151     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
6152     ArrayRef<const Expr *> ReductionOps) {
6153   ASTContext &C = CGM.getContext();
6154 
6155   // void reduction_func(void *LHSArg, void *RHSArg);
6156   FunctionArgList Args;
6157   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6158                            ImplicitParamDecl::Other);
6159   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6160                            ImplicitParamDecl::Other);
6161   Args.push_back(&LHSArg);
6162   Args.push_back(&RHSArg);
6163   const auto &CGFI =
6164       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6165   std::string Name = getName({"omp", "reduction", "reduction_func"});
6166   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
6167                                     llvm::GlobalValue::InternalLinkage, Name,
6168                                     &CGM.getModule());
6169   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
6170   Fn->setDoesNotRecurse();
6171   CodeGenFunction CGF(CGM);
6172   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
6173 
6174   // Dst = (void*[n])(LHSArg);
6175   // Src = (void*[n])(RHSArg);
6176   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6177       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
6178       ArgsType), CGF.getPointerAlign());
6179   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6180       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
6181       ArgsType), CGF.getPointerAlign());
6182 
6183   //  ...
6184   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
6185   //  ...
6186   CodeGenFunction::OMPPrivateScope Scope(CGF);
6187   auto IPriv = Privates.begin();
6188   unsigned Idx = 0;
6189   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
6190     const auto *RHSVar =
6191         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
6192     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
6193       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
6194     });
6195     const auto *LHSVar =
6196         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
6197     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
6198       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
6199     });
6200     QualType PrivTy = (*IPriv)->getType();
6201     if (PrivTy->isVariablyModifiedType()) {
6202       // Get array size and emit VLA type.
6203       ++Idx;
6204       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
6205       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
6206       const VariableArrayType *VLA =
6207           CGF.getContext().getAsVariableArrayType(PrivTy);
6208       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
6209       CodeGenFunction::OpaqueValueMapping OpaqueMap(
6210           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
6211       CGF.EmitVariablyModifiedType(PrivTy);
6212     }
6213   }
6214   Scope.Privatize();
6215   IPriv = Privates.begin();
6216   auto ILHS = LHSExprs.begin();
6217   auto IRHS = RHSExprs.begin();
6218   for (const Expr *E : ReductionOps) {
6219     if ((*IPriv)->getType()->isArrayType()) {
6220       // Emit reduction for array section.
6221       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6222       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6223       EmitOMPAggregateReduction(
6224           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6225           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6226             emitReductionCombiner(CGF, E);
6227           });
6228     } else {
6229       // Emit reduction for array subscript or single variable.
6230       emitReductionCombiner(CGF, E);
6231     }
6232     ++IPriv;
6233     ++ILHS;
6234     ++IRHS;
6235   }
6236   Scope.ForceCleanup();
6237   CGF.FinishFunction();
6238   return Fn;
6239 }
6240 
6241 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
6242                                                   const Expr *ReductionOp,
6243                                                   const Expr *PrivateRef,
6244                                                   const DeclRefExpr *LHS,
6245                                                   const DeclRefExpr *RHS) {
6246   if (PrivateRef->getType()->isArrayType()) {
6247     // Emit reduction for array section.
6248     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
6249     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
6250     EmitOMPAggregateReduction(
6251         CGF, PrivateRef->getType(), LHSVar, RHSVar,
6252         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6253           emitReductionCombiner(CGF, ReductionOp);
6254         });
6255   } else {
6256     // Emit reduction for array subscript or single variable.
6257     emitReductionCombiner(CGF, ReductionOp);
6258   }
6259 }
6260 
6261 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
6262                                     ArrayRef<const Expr *> Privates,
6263                                     ArrayRef<const Expr *> LHSExprs,
6264                                     ArrayRef<const Expr *> RHSExprs,
6265                                     ArrayRef<const Expr *> ReductionOps,
6266                                     ReductionOptionsTy Options) {
6267   if (!CGF.HaveInsertPoint())
6268     return;
6269 
6270   bool WithNowait = Options.WithNowait;
6271   bool SimpleReduction = Options.SimpleReduction;
6272 
6273   // Next code should be emitted for reduction:
6274   //
6275   // static kmp_critical_name lock = { 0 };
6276   //
6277   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
6278   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
6279   //  ...
6280   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
6281   //  *(Type<n>-1*)rhs[<n>-1]);
6282   // }
6283   //
6284   // ...
6285   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
6286   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6287   // RedList, reduce_func, &<lock>)) {
6288   // case 1:
6289   //  ...
6290   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6291   //  ...
6292   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6293   // break;
6294   // case 2:
6295   //  ...
6296   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6297   //  ...
6298   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
6299   // break;
6300   // default:;
6301   // }
6302   //
6303   // if SimpleReduction is true, only the next code is generated:
6304   //  ...
6305   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6306   //  ...
6307 
6308   ASTContext &C = CGM.getContext();
6309 
6310   if (SimpleReduction) {
6311     CodeGenFunction::RunCleanupsScope Scope(CGF);
6312     auto IPriv = Privates.begin();
6313     auto ILHS = LHSExprs.begin();
6314     auto IRHS = RHSExprs.begin();
6315     for (const Expr *E : ReductionOps) {
6316       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6317                                   cast<DeclRefExpr>(*IRHS));
6318       ++IPriv;
6319       ++ILHS;
6320       ++IRHS;
6321     }
6322     return;
6323   }
6324 
6325   // 1. Build a list of reduction variables.
6326   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
6327   auto Size = RHSExprs.size();
6328   for (const Expr *E : Privates) {
6329     if (E->getType()->isVariablyModifiedType())
6330       // Reserve place for array size.
6331       ++Size;
6332   }
6333   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6334   QualType ReductionArrayTy =
6335       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6336                              /*IndexTypeQuals=*/0);
6337   Address ReductionList =
6338       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6339   auto IPriv = Privates.begin();
6340   unsigned Idx = 0;
6341   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6342     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6343     CGF.Builder.CreateStore(
6344         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6345             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6346         Elem);
6347     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6348       // Store array size.
6349       ++Idx;
6350       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6351       llvm::Value *Size = CGF.Builder.CreateIntCast(
6352           CGF.getVLASize(
6353                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6354               .NumElts,
6355           CGF.SizeTy, /*isSigned=*/false);
6356       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6357                               Elem);
6358     }
6359   }
6360 
6361   // 2. Emit reduce_func().
6362   llvm::Function *ReductionFn = emitReductionFunction(
6363       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6364       LHSExprs, RHSExprs, ReductionOps);
6365 
6366   // 3. Create static kmp_critical_name lock = { 0 };
6367   std::string Name = getName({"reduction"});
6368   llvm::Value *Lock = getCriticalRegionLock(Name);
6369 
6370   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6371   // RedList, reduce_func, &<lock>);
6372   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6373   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6374   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6375   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6376       ReductionList.getPointer(), CGF.VoidPtrTy);
6377   llvm::Value *Args[] = {
6378       IdentTLoc,                             // ident_t *<loc>
6379       ThreadId,                              // i32 <gtid>
6380       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6381       ReductionArrayTySize,                  // size_type sizeof(RedList)
6382       RL,                                    // void *RedList
6383       ReductionFn, // void (*) (void *, void *) <reduce_func>
6384       Lock         // kmp_critical_name *&<lock>
6385   };
6386   llvm::Value *Res = CGF.EmitRuntimeCall(
6387       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6388                                        : OMPRTL__kmpc_reduce),
6389       Args);
6390 
6391   // 5. Build switch(res)
6392   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6393   llvm::SwitchInst *SwInst =
6394       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6395 
6396   // 6. Build case 1:
6397   //  ...
6398   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6399   //  ...
6400   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6401   // break;
6402   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6403   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6404   CGF.EmitBlock(Case1BB);
6405 
6406   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6407   llvm::Value *EndArgs[] = {
6408       IdentTLoc, // ident_t *<loc>
6409       ThreadId,  // i32 <gtid>
6410       Lock       // kmp_critical_name *&<lock>
6411   };
6412   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6413                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6414     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415     auto IPriv = Privates.begin();
6416     auto ILHS = LHSExprs.begin();
6417     auto IRHS = RHSExprs.begin();
6418     for (const Expr *E : ReductionOps) {
6419       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6420                                      cast<DeclRefExpr>(*IRHS));
6421       ++IPriv;
6422       ++ILHS;
6423       ++IRHS;
6424     }
6425   };
6426   RegionCodeGenTy RCG(CodeGen);
6427   CommonActionTy Action(
6428       nullptr, llvm::None,
6429       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6430                                        : OMPRTL__kmpc_end_reduce),
6431       EndArgs);
6432   RCG.setAction(Action);
6433   RCG(CGF);
6434 
6435   CGF.EmitBranch(DefaultBB);
6436 
6437   // 7. Build case 2:
6438   //  ...
6439   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6440   //  ...
6441   // break;
6442   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6443   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6444   CGF.EmitBlock(Case2BB);
6445 
6446   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6447                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6448     auto ILHS = LHSExprs.begin();
6449     auto IRHS = RHSExprs.begin();
6450     auto IPriv = Privates.begin();
6451     for (const Expr *E : ReductionOps) {
6452       const Expr *XExpr = nullptr;
6453       const Expr *EExpr = nullptr;
6454       const Expr *UpExpr = nullptr;
6455       BinaryOperatorKind BO = BO_Comma;
6456       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6457         if (BO->getOpcode() == BO_Assign) {
6458           XExpr = BO->getLHS();
6459           UpExpr = BO->getRHS();
6460         }
6461       }
6462       // Try to emit update expression as a simple atomic.
6463       const Expr *RHSExpr = UpExpr;
6464       if (RHSExpr) {
6465         // Analyze RHS part of the whole expression.
6466         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6467                 RHSExpr->IgnoreParenImpCasts())) {
6468           // If this is a conditional operator, analyze its condition for
6469           // min/max reduction operator.
6470           RHSExpr = ACO->getCond();
6471         }
6472         if (const auto *BORHS =
6473                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6474           EExpr = BORHS->getRHS();
6475           BO = BORHS->getOpcode();
6476         }
6477       }
6478       if (XExpr) {
6479         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6480         auto &&AtomicRedGen = [BO, VD,
6481                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6482                                     const Expr *EExpr, const Expr *UpExpr) {
6483           LValue X = CGF.EmitLValue(XExpr);
6484           RValue E;
6485           if (EExpr)
6486             E = CGF.EmitAnyExpr(EExpr);
6487           CGF.EmitOMPAtomicSimpleUpdateExpr(
6488               X, E, BO, /*IsXLHSInRHSPart=*/true,
6489               llvm::AtomicOrdering::Monotonic, Loc,
6490               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6491                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6492                 PrivateScope.addPrivate(
6493                     VD, [&CGF, VD, XRValue, Loc]() {
6494                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6495                       CGF.emitOMPSimpleStore(
6496                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6497                           VD->getType().getNonReferenceType(), Loc);
6498                       return LHSTemp;
6499                     });
6500                 (void)PrivateScope.Privatize();
6501                 return CGF.EmitAnyExpr(UpExpr);
6502               });
6503         };
6504         if ((*IPriv)->getType()->isArrayType()) {
6505           // Emit atomic reduction for array section.
6506           const auto *RHSVar =
6507               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6508           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6509                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6510         } else {
6511           // Emit atomic reduction for array subscript or single variable.
6512           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6513         }
6514       } else {
6515         // Emit as a critical region.
6516         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6517                                            const Expr *, const Expr *) {
6518           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6519           std::string Name = RT.getName({"atomic_reduction"});
6520           RT.emitCriticalRegion(
6521               CGF, Name,
6522               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6523                 Action.Enter(CGF);
6524                 emitReductionCombiner(CGF, E);
6525               },
6526               Loc);
6527         };
6528         if ((*IPriv)->getType()->isArrayType()) {
6529           const auto *LHSVar =
6530               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6531           const auto *RHSVar =
6532               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6533           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6534                                     CritRedGen);
6535         } else {
6536           CritRedGen(CGF, nullptr, nullptr, nullptr);
6537         }
6538       }
6539       ++ILHS;
6540       ++IRHS;
6541       ++IPriv;
6542     }
6543   };
6544   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6545   if (!WithNowait) {
6546     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6547     llvm::Value *EndArgs[] = {
6548         IdentTLoc, // ident_t *<loc>
6549         ThreadId,  // i32 <gtid>
6550         Lock       // kmp_critical_name *&<lock>
6551     };
6552     CommonActionTy Action(nullptr, llvm::None,
6553                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6554                           EndArgs);
6555     AtomicRCG.setAction(Action);
6556     AtomicRCG(CGF);
6557   } else {
6558     AtomicRCG(CGF);
6559   }
6560 
6561   CGF.EmitBranch(DefaultBB);
6562   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6563 }
6564 
6565 /// Generates unique name for artificial threadprivate variables.
6566 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6567 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6568                                       const Expr *Ref) {
6569   SmallString<256> Buffer;
6570   llvm::raw_svector_ostream Out(Buffer);
6571   const clang::DeclRefExpr *DE;
6572   const VarDecl *D = ::getBaseDecl(Ref, DE);
6573   if (!D)
6574     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6575   D = D->getCanonicalDecl();
6576   std::string Name = CGM.getOpenMPRuntime().getName(
6577       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6578   Out << Prefix << Name << "_"
6579       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6580   return std::string(Out.str());
6581 }
6582 
6583 /// Emits reduction initializer function:
6584 /// \code
6585 /// void @.red_init(void* %arg, void* %orig) {
6586 /// %0 = bitcast void* %arg to <type>*
6587 /// store <type> <init>, <type>* %0
6588 /// ret void
6589 /// }
6590 /// \endcode
6591 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6592                                            SourceLocation Loc,
6593                                            ReductionCodeGen &RCG, unsigned N) {
6594   ASTContext &C = CGM.getContext();
6595   QualType VoidPtrTy = C.VoidPtrTy;
6596   VoidPtrTy.addRestrict();
6597   FunctionArgList Args;
6598   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6599                           ImplicitParamDecl::Other);
6600   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6601                               ImplicitParamDecl::Other);
6602   Args.emplace_back(&Param);
6603   Args.emplace_back(&ParamOrig);
6604   const auto &FnInfo =
6605       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6606   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6607   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6608   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6609                                     Name, &CGM.getModule());
6610   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6611   Fn->setDoesNotRecurse();
6612   CodeGenFunction CGF(CGM);
6613   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6614   Address PrivateAddr = CGF.EmitLoadOfPointer(
6615       CGF.GetAddrOfLocalVar(&Param),
6616       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6617   llvm::Value *Size = nullptr;
6618   // If the size of the reduction item is non-constant, load it from global
6619   // threadprivate variable.
6620   if (RCG.getSizes(N).second) {
6621     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6622         CGF, CGM.getContext().getSizeType(),
6623         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6624     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6625                                 CGM.getContext().getSizeType(), Loc);
6626   }
6627   RCG.emitAggregateType(CGF, N, Size);
6628   LValue OrigLVal;
6629   // If initializer uses initializer from declare reduction construct, emit a
6630   // pointer to the address of the original reduction item (reuired by reduction
6631   // initializer)
6632   if (RCG.usesReductionInitializer(N)) {
6633     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
6634     SharedAddr = CGF.EmitLoadOfPointer(
6635         SharedAddr,
6636         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6637     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6638   } else {
6639     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
6640         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6641         CGM.getContext().VoidPtrTy);
6642   }
6643   // Emit the initializer:
6644   // %0 = bitcast void* %arg to <type>*
6645   // store <type> <init>, <type>* %0
6646   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
6647                          [](CodeGenFunction &) { return false; });
6648   CGF.FinishFunction();
6649   return Fn;
6650 }
6651 
6652 /// Emits reduction combiner function:
6653 /// \code
6654 /// void @.red_comb(void* %arg0, void* %arg1) {
6655 /// %lhs = bitcast void* %arg0 to <type>*
6656 /// %rhs = bitcast void* %arg1 to <type>*
6657 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6658 /// store <type> %2, <type>* %lhs
6659 /// ret void
6660 /// }
6661 /// \endcode
6662 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6663                                            SourceLocation Loc,
6664                                            ReductionCodeGen &RCG, unsigned N,
6665                                            const Expr *ReductionOp,
6666                                            const Expr *LHS, const Expr *RHS,
6667                                            const Expr *PrivateRef) {
6668   ASTContext &C = CGM.getContext();
6669   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6670   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6671   FunctionArgList Args;
6672   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6673                                C.VoidPtrTy, ImplicitParamDecl::Other);
6674   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6675                             ImplicitParamDecl::Other);
6676   Args.emplace_back(&ParamInOut);
6677   Args.emplace_back(&ParamIn);
6678   const auto &FnInfo =
6679       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6680   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6681   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6682   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6683                                     Name, &CGM.getModule());
6684   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6685   Fn->setDoesNotRecurse();
6686   CodeGenFunction CGF(CGM);
6687   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6688   llvm::Value *Size = nullptr;
6689   // If the size of the reduction item is non-constant, load it from global
6690   // threadprivate variable.
6691   if (RCG.getSizes(N).second) {
6692     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6693         CGF, CGM.getContext().getSizeType(),
6694         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6695     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6696                                 CGM.getContext().getSizeType(), Loc);
6697   }
6698   RCG.emitAggregateType(CGF, N, Size);
6699   // Remap lhs and rhs variables to the addresses of the function arguments.
6700   // %lhs = bitcast void* %arg0 to <type>*
6701   // %rhs = bitcast void* %arg1 to <type>*
6702   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6703   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6704     // Pull out the pointer to the variable.
6705     Address PtrAddr = CGF.EmitLoadOfPointer(
6706         CGF.GetAddrOfLocalVar(&ParamInOut),
6707         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6708     return CGF.Builder.CreateElementBitCast(
6709         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6710   });
6711   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6712     // Pull out the pointer to the variable.
6713     Address PtrAddr = CGF.EmitLoadOfPointer(
6714         CGF.GetAddrOfLocalVar(&ParamIn),
6715         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6716     return CGF.Builder.CreateElementBitCast(
6717         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6718   });
6719   PrivateScope.Privatize();
6720   // Emit the combiner body:
6721   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6722   // store <type> %2, <type>* %lhs
6723   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6724       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6725       cast<DeclRefExpr>(RHS));
6726   CGF.FinishFunction();
6727   return Fn;
6728 }
6729 
6730 /// Emits reduction finalizer function:
6731 /// \code
6732 /// void @.red_fini(void* %arg) {
6733 /// %0 = bitcast void* %arg to <type>*
6734 /// <destroy>(<type>* %0)
6735 /// ret void
6736 /// }
6737 /// \endcode
6738 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6739                                            SourceLocation Loc,
6740                                            ReductionCodeGen &RCG, unsigned N) {
6741   if (!RCG.needCleanups(N))
6742     return nullptr;
6743   ASTContext &C = CGM.getContext();
6744   FunctionArgList Args;
6745   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6746                           ImplicitParamDecl::Other);
6747   Args.emplace_back(&Param);
6748   const auto &FnInfo =
6749       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6750   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6751   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6752   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6753                                     Name, &CGM.getModule());
6754   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6755   Fn->setDoesNotRecurse();
6756   CodeGenFunction CGF(CGM);
6757   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6758   Address PrivateAddr = CGF.EmitLoadOfPointer(
6759       CGF.GetAddrOfLocalVar(&Param),
6760       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6761   llvm::Value *Size = nullptr;
6762   // If the size of the reduction item is non-constant, load it from global
6763   // threadprivate variable.
6764   if (RCG.getSizes(N).second) {
6765     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6766         CGF, CGM.getContext().getSizeType(),
6767         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6768     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6769                                 CGM.getContext().getSizeType(), Loc);
6770   }
6771   RCG.emitAggregateType(CGF, N, Size);
6772   // Emit the finalizer body:
6773   // <destroy>(<type>* %0)
6774   RCG.emitCleanups(CGF, N, PrivateAddr);
6775   CGF.FinishFunction(Loc);
6776   return Fn;
6777 }
6778 
6779 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6780     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6781     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6782   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6783     return nullptr;
6784 
6785   // Build typedef struct:
6786   // kmp_taskred_input {
6787   //   void *reduce_shar; // shared reduction item
6788   //   void *reduce_orig; // original reduction item used for initialization
6789   //   size_t reduce_size; // size of data item
6790   //   void *reduce_init; // data initialization routine
6791   //   void *reduce_fini; // data finalization routine
6792   //   void *reduce_comb; // data combiner routine
6793   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6794   // } kmp_taskred_input_t;
6795   ASTContext &C = CGM.getContext();
6796   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6797   RD->startDefinition();
6798   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6799   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6800   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6801   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6802   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6803   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6804   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6805       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6806   RD->completeDefinition();
6807   QualType RDType = C.getRecordType(RD);
6808   unsigned Size = Data.ReductionVars.size();
6809   llvm::APInt ArraySize(/*numBits=*/64, Size);
6810   QualType ArrayRDType = C.getConstantArrayType(
6811       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6812   // kmp_task_red_input_t .rd_input.[Size];
6813   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6814   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6815                        Data.ReductionCopies, Data.ReductionOps);
6816   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6817     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6818     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6819                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6820     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6821         TaskRedInput.getPointer(), Idxs,
6822         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6823         ".rd_input.gep.");
6824     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6825     // ElemLVal.reduce_shar = &Shareds[Cnt];
6826     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6827     RCG.emitSharedOrigLValue(CGF, Cnt);
6828     llvm::Value *CastedShared =
6829         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6830     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6831     // ElemLVal.reduce_orig = &Origs[Cnt];
6832     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6833     llvm::Value *CastedOrig =
6834         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6835     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6836     RCG.emitAggregateType(CGF, Cnt);
6837     llvm::Value *SizeValInChars;
6838     llvm::Value *SizeVal;
6839     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6840     // We use delayed creation/initialization for VLAs and array sections. It is
6841     // required because runtime does not provide the way to pass the sizes of
6842     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6843     // threadprivate global variables are used to store these values and use
6844     // them in the functions.
6845     bool DelayedCreation = !!SizeVal;
6846     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6847                                                /*isSigned=*/false);
6848     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6849     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6850     // ElemLVal.reduce_init = init;
6851     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6852     llvm::Value *InitAddr =
6853         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6854     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6855     // ElemLVal.reduce_fini = fini;
6856     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6857     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6858     llvm::Value *FiniAddr = Fini
6859                                 ? CGF.EmitCastToVoidPtr(Fini)
6860                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6861     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6862     // ElemLVal.reduce_comb = comb;
6863     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6864     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6865         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6866         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6867     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6868     // ElemLVal.flags = 0;
6869     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6870     if (DelayedCreation) {
6871       CGF.EmitStoreOfScalar(
6872           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6873           FlagsLVal);
6874     } else
6875       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6876                                  FlagsLVal.getType());
6877   }
6878   if (Data.IsReductionWithTaskMod) {
6879     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6880     // is_ws, int num, void *data);
6881     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6882     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6883                                                   CGM.IntTy, /*isSigned=*/true);
6884     llvm::Value *Args[] = {
6885         IdentTLoc, GTid,
6886         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6887                                /*isSigned=*/true),
6888         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6889         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6890             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6891     return CGF.EmitRuntimeCall(
6892         createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
6893   }
6894   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6895   llvm::Value *Args[] = {
6896       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6897                                 /*isSigned=*/true),
6898       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6899       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6900                                                       CGM.VoidPtrTy)};
6901   return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init),
6902                              Args);
6903 }
6904 
6905 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6906                                             SourceLocation Loc,
6907                                             bool IsWorksharingReduction) {
6908   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6909   // is_ws, int num, void *data);
6910   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6911   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6912                                                 CGM.IntTy, /*isSigned=*/true);
6913   llvm::Value *Args[] = {IdentTLoc, GTid,
6914                          llvm::ConstantInt::get(CGM.IntTy,
6915                                                 IsWorksharingReduction ? 1 : 0,
6916                                                 /*isSigned=*/true)};
6917   (void)CGF.EmitRuntimeCall(
6918       createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
6919 }
6920 
6921 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6922                                               SourceLocation Loc,
6923                                               ReductionCodeGen &RCG,
6924                                               unsigned N) {
6925   auto Sizes = RCG.getSizes(N);
6926   // Emit threadprivate global variable if the type is non-constant
6927   // (Sizes.second = nullptr).
6928   if (Sizes.second) {
6929     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6930                                                      /*isSigned=*/false);
6931     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6932         CGF, CGM.getContext().getSizeType(),
6933         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6934     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6935   }
6936 }
6937 
6938 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6939                                               SourceLocation Loc,
6940                                               llvm::Value *ReductionsPtr,
6941                                               LValue SharedLVal) {
6942   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6943   // *d);
6944   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6945                                                    CGM.IntTy,
6946                                                    /*isSigned=*/true),
6947                          ReductionsPtr,
6948                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6949                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6950   return Address(
6951       CGF.EmitRuntimeCall(
6952           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6953       SharedLVal.getAlignment());
6954 }
6955 
6956 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6957                                        SourceLocation Loc) {
6958   if (!CGF.HaveInsertPoint())
6959     return;
6960 
6961   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6962   if (OMPBuilder) {
6963     OMPBuilder->CreateTaskwait(CGF.Builder);
6964   } else {
6965     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6966     // global_tid);
6967     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6968     // Ignore return result until untied tasks are supported.
6969     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6970   }
6971 
6972   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6973     Region->emitUntiedSwitch(CGF);
6974 }
6975 
6976 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6977                                            OpenMPDirectiveKind InnerKind,
6978                                            const RegionCodeGenTy &CodeGen,
6979                                            bool HasCancel) {
6980   if (!CGF.HaveInsertPoint())
6981     return;
6982   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6983   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6984 }
6985 
6986 namespace {
6987 enum RTCancelKind {
6988   CancelNoreq = 0,
6989   CancelParallel = 1,
6990   CancelLoop = 2,
6991   CancelSections = 3,
6992   CancelTaskgroup = 4
6993 };
6994 } // anonymous namespace
6995 
6996 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6997   RTCancelKind CancelKind = CancelNoreq;
6998   if (CancelRegion == OMPD_parallel)
6999     CancelKind = CancelParallel;
7000   else if (CancelRegion == OMPD_for)
7001     CancelKind = CancelLoop;
7002   else if (CancelRegion == OMPD_sections)
7003     CancelKind = CancelSections;
7004   else {
7005     assert(CancelRegion == OMPD_taskgroup);
7006     CancelKind = CancelTaskgroup;
7007   }
7008   return CancelKind;
7009 }
7010 
7011 void CGOpenMPRuntime::emitCancellationPointCall(
7012     CodeGenFunction &CGF, SourceLocation Loc,
7013     OpenMPDirectiveKind CancelRegion) {
7014   if (!CGF.HaveInsertPoint())
7015     return;
7016   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
7017   // global_tid, kmp_int32 cncl_kind);
7018   if (auto *OMPRegionInfo =
7019           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7020     // For 'cancellation point taskgroup', the task region info may not have a
7021     // cancel. This may instead happen in another adjacent task.
7022     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
7023       llvm::Value *Args[] = {
7024           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
7025           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7026       // Ignore return result until untied tasks are supported.
7027       llvm::Value *Result = CGF.EmitRuntimeCall(
7028           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
7029       // if (__kmpc_cancellationpoint()) {
7030       //   exit from construct;
7031       // }
7032       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7033       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7034       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7035       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7036       CGF.EmitBlock(ExitBB);
7037       // exit from construct;
7038       CodeGenFunction::JumpDest CancelDest =
7039           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7040       CGF.EmitBranchThroughCleanup(CancelDest);
7041       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7042     }
7043   }
7044 }
7045 
7046 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
7047                                      const Expr *IfCond,
7048                                      OpenMPDirectiveKind CancelRegion) {
7049   if (!CGF.HaveInsertPoint())
7050     return;
7051   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
7052   // kmp_int32 cncl_kind);
7053   if (auto *OMPRegionInfo =
7054           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7055     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
7056                                                         PrePostActionTy &) {
7057       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
7058       llvm::Value *Args[] = {
7059           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
7060           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7061       // Ignore return result until untied tasks are supported.
7062       llvm::Value *Result = CGF.EmitRuntimeCall(
7063           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
7064       // if (__kmpc_cancel()) {
7065       //   exit from construct;
7066       // }
7067       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7068       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7069       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7070       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7071       CGF.EmitBlock(ExitBB);
7072       // exit from construct;
7073       CodeGenFunction::JumpDest CancelDest =
7074           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7075       CGF.EmitBranchThroughCleanup(CancelDest);
7076       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7077     };
7078     if (IfCond) {
7079       emitIfClause(CGF, IfCond, ThenGen,
7080                    [](CodeGenFunction &, PrePostActionTy &) {});
7081     } else {
7082       RegionCodeGenTy ThenRCG(ThenGen);
7083       ThenRCG(CGF);
7084     }
7085   }
7086 }
7087 
7088 void CGOpenMPRuntime::emitTargetOutlinedFunction(
7089     const OMPExecutableDirective &D, StringRef ParentName,
7090     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7091     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7092   assert(!ParentName.empty() && "Invalid target region parent name!");
7093   HasEmittedTargetRegion = true;
7094   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
7095                                    IsOffloadEntry, CodeGen);
7096 }
7097 
7098 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
7099     const OMPExecutableDirective &D, StringRef ParentName,
7100     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7101     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7102   // Create a unique name for the entry function using the source location
7103   // information of the current target region. The name will be something like:
7104   //
7105   // __omp_offloading_DD_FFFF_PP_lBB
7106   //
7107   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
7108   // mangled name of the function that encloses the target region and BB is the
7109   // line number of the target region.
7110 
7111   unsigned DeviceID;
7112   unsigned FileID;
7113   unsigned Line;
7114   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
7115                            Line);
7116   SmallString<64> EntryFnName;
7117   {
7118     llvm::raw_svector_ostream OS(EntryFnName);
7119     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
7120        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
7121   }
7122 
7123   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7124 
7125   CodeGenFunction CGF(CGM, true);
7126   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
7127   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7128 
7129   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
7130 
7131   // If this target outline function is not an offload entry, we don't need to
7132   // register it.
7133   if (!IsOffloadEntry)
7134     return;
7135 
7136   // The target region ID is used by the runtime library to identify the current
7137   // target region, so it only has to be unique and not necessarily point to
7138   // anything. It could be the pointer to the outlined function that implements
7139   // the target region, but we aren't using that so that the compiler doesn't
7140   // need to keep that, and could therefore inline the host function if proven
7141   // worthwhile during optimization. In the other hand, if emitting code for the
7142   // device, the ID has to be the function address so that it can retrieved from
7143   // the offloading entry and launched by the runtime library. We also mark the
7144   // outlined function to have external linkage in case we are emitting code for
7145   // the device, because these functions will be entry points to the device.
7146 
7147   if (CGM.getLangOpts().OpenMPIsDevice) {
7148     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
7149     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
7150     OutlinedFn->setDSOLocal(false);
7151   } else {
7152     std::string Name = getName({EntryFnName, "region_id"});
7153     OutlinedFnID = new llvm::GlobalVariable(
7154         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
7155         llvm::GlobalValue::WeakAnyLinkage,
7156         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
7157   }
7158 
7159   // Register the information for the entry associated with this target region.
7160   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
7161       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
7162       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
7163 }
7164 
7165 /// Checks if the expression is constant or does not have non-trivial function
7166 /// calls.
7167 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
7168   // We can skip constant expressions.
7169   // We can skip expressions with trivial calls or simple expressions.
7170   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
7171           !E->hasNonTrivialCall(Ctx)) &&
7172          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
7173 }
7174 
7175 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
7176                                                     const Stmt *Body) {
7177   const Stmt *Child = Body->IgnoreContainers();
7178   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
7179     Child = nullptr;
7180     for (const Stmt *S : C->body()) {
7181       if (const auto *E = dyn_cast<Expr>(S)) {
7182         if (isTrivial(Ctx, E))
7183           continue;
7184       }
7185       // Some of the statements can be ignored.
7186       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
7187           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
7188         continue;
7189       // Analyze declarations.
7190       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
7191         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
7192               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
7193                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
7194                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
7195                   isa<UsingDirectiveDecl>(D) ||
7196                   isa<OMPDeclareReductionDecl>(D) ||
7197                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
7198                 return true;
7199               const auto *VD = dyn_cast<VarDecl>(D);
7200               if (!VD)
7201                 return false;
7202               return VD->isConstexpr() ||
7203                      ((VD->getType().isTrivialType(Ctx) ||
7204                        VD->getType()->isReferenceType()) &&
7205                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
7206             }))
7207           continue;
7208       }
7209       // Found multiple children - cannot get the one child only.
7210       if (Child)
7211         return nullptr;
7212       Child = S;
7213     }
7214     if (Child)
7215       Child = Child->IgnoreContainers();
7216   }
7217   return Child;
7218 }
7219 
7220 /// Emit the number of teams for a target directive.  Inspect the num_teams
7221 /// clause associated with a teams construct combined or closely nested
7222 /// with the target directive.
7223 ///
7224 /// Emit a team of size one for directives such as 'target parallel' that
7225 /// have no associated teams construct.
7226 ///
7227 /// Otherwise, return nullptr.
7228 static llvm::Value *
7229 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
7230                                const OMPExecutableDirective &D) {
7231   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7232          "Clauses associated with the teams directive expected to be emitted "
7233          "only for the host!");
7234   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7235   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7236          "Expected target-based executable directive.");
7237   CGBuilderTy &Bld = CGF.Builder;
7238   switch (DirectiveKind) {
7239   case OMPD_target: {
7240     const auto *CS = D.getInnermostCapturedStmt();
7241     const auto *Body =
7242         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
7243     const Stmt *ChildStmt =
7244         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
7245     if (const auto *NestedDir =
7246             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
7247       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
7248         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
7249           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7250           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7251           const Expr *NumTeams =
7252               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7253           llvm::Value *NumTeamsVal =
7254               CGF.EmitScalarExpr(NumTeams,
7255                                  /*IgnoreResultAssign*/ true);
7256           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7257                                    /*isSigned=*/true);
7258         }
7259         return Bld.getInt32(0);
7260       }
7261       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
7262           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
7263         return Bld.getInt32(1);
7264       return Bld.getInt32(0);
7265     }
7266     return nullptr;
7267   }
7268   case OMPD_target_teams:
7269   case OMPD_target_teams_distribute:
7270   case OMPD_target_teams_distribute_simd:
7271   case OMPD_target_teams_distribute_parallel_for:
7272   case OMPD_target_teams_distribute_parallel_for_simd: {
7273     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
7274       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
7275       const Expr *NumTeams =
7276           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7277       llvm::Value *NumTeamsVal =
7278           CGF.EmitScalarExpr(NumTeams,
7279                              /*IgnoreResultAssign*/ true);
7280       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7281                                /*isSigned=*/true);
7282     }
7283     return Bld.getInt32(0);
7284   }
7285   case OMPD_target_parallel:
7286   case OMPD_target_parallel_for:
7287   case OMPD_target_parallel_for_simd:
7288   case OMPD_target_simd:
7289     return Bld.getInt32(1);
7290   case OMPD_parallel:
7291   case OMPD_for:
7292   case OMPD_parallel_for:
7293   case OMPD_parallel_master:
7294   case OMPD_parallel_sections:
7295   case OMPD_for_simd:
7296   case OMPD_parallel_for_simd:
7297   case OMPD_cancel:
7298   case OMPD_cancellation_point:
7299   case OMPD_ordered:
7300   case OMPD_threadprivate:
7301   case OMPD_allocate:
7302   case OMPD_task:
7303   case OMPD_simd:
7304   case OMPD_sections:
7305   case OMPD_section:
7306   case OMPD_single:
7307   case OMPD_master:
7308   case OMPD_critical:
7309   case OMPD_taskyield:
7310   case OMPD_barrier:
7311   case OMPD_taskwait:
7312   case OMPD_taskgroup:
7313   case OMPD_atomic:
7314   case OMPD_flush:
7315   case OMPD_depobj:
7316   case OMPD_scan:
7317   case OMPD_teams:
7318   case OMPD_target_data:
7319   case OMPD_target_exit_data:
7320   case OMPD_target_enter_data:
7321   case OMPD_distribute:
7322   case OMPD_distribute_simd:
7323   case OMPD_distribute_parallel_for:
7324   case OMPD_distribute_parallel_for_simd:
7325   case OMPD_teams_distribute:
7326   case OMPD_teams_distribute_simd:
7327   case OMPD_teams_distribute_parallel_for:
7328   case OMPD_teams_distribute_parallel_for_simd:
7329   case OMPD_target_update:
7330   case OMPD_declare_simd:
7331   case OMPD_declare_variant:
7332   case OMPD_begin_declare_variant:
7333   case OMPD_end_declare_variant:
7334   case OMPD_declare_target:
7335   case OMPD_end_declare_target:
7336   case OMPD_declare_reduction:
7337   case OMPD_declare_mapper:
7338   case OMPD_taskloop:
7339   case OMPD_taskloop_simd:
7340   case OMPD_master_taskloop:
7341   case OMPD_master_taskloop_simd:
7342   case OMPD_parallel_master_taskloop:
7343   case OMPD_parallel_master_taskloop_simd:
7344   case OMPD_requires:
7345   case OMPD_unknown:
7346     break;
7347   }
7348   llvm_unreachable("Unexpected directive kind.");
7349 }
7350 
7351 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
7352                                   llvm::Value *DefaultThreadLimitVal) {
7353   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7354       CGF.getContext(), CS->getCapturedStmt());
7355   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7356     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
7357       llvm::Value *NumThreads = nullptr;
7358       llvm::Value *CondVal = nullptr;
7359       // Handle if clause. If if clause present, the number of threads is
7360       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7361       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7362         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7363         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7364         const OMPIfClause *IfClause = nullptr;
7365         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7366           if (C->getNameModifier() == OMPD_unknown ||
7367               C->getNameModifier() == OMPD_parallel) {
7368             IfClause = C;
7369             break;
7370           }
7371         }
7372         if (IfClause) {
7373           const Expr *Cond = IfClause->getCondition();
7374           bool Result;
7375           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7376             if (!Result)
7377               return CGF.Builder.getInt32(1);
7378           } else {
7379             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7380             if (const auto *PreInit =
7381                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7382               for (const auto *I : PreInit->decls()) {
7383                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7384                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7385                 } else {
7386                   CodeGenFunction::AutoVarEmission Emission =
7387                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7388                   CGF.EmitAutoVarCleanups(Emission);
7389                 }
7390               }
7391             }
7392             CondVal = CGF.EvaluateExprAsBool(Cond);
7393           }
7394         }
7395       }
7396       // Check the value of num_threads clause iff if clause was not specified
7397       // or is not evaluated to false.
7398       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7399         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7400         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7401         const auto *NumThreadsClause =
7402             Dir->getSingleClause<OMPNumThreadsClause>();
7403         CodeGenFunction::LexicalScope Scope(
7404             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7405         if (const auto *PreInit =
7406                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7407           for (const auto *I : PreInit->decls()) {
7408             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7409               CGF.EmitVarDecl(cast<VarDecl>(*I));
7410             } else {
7411               CodeGenFunction::AutoVarEmission Emission =
7412                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7413               CGF.EmitAutoVarCleanups(Emission);
7414             }
7415           }
7416         }
7417         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7418         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7419                                                /*isSigned=*/false);
7420         if (DefaultThreadLimitVal)
7421           NumThreads = CGF.Builder.CreateSelect(
7422               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7423               DefaultThreadLimitVal, NumThreads);
7424       } else {
7425         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7426                                            : CGF.Builder.getInt32(0);
7427       }
7428       // Process condition of the if clause.
7429       if (CondVal) {
7430         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7431                                               CGF.Builder.getInt32(1));
7432       }
7433       return NumThreads;
7434     }
7435     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7436       return CGF.Builder.getInt32(1);
7437     return DefaultThreadLimitVal;
7438   }
7439   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7440                                : CGF.Builder.getInt32(0);
7441 }
7442 
7443 /// Emit the number of threads for a target directive.  Inspect the
7444 /// thread_limit clause associated with a teams construct combined or closely
7445 /// nested with the target directive.
7446 ///
7447 /// Emit the num_threads clause for directives such as 'target parallel' that
7448 /// have no associated teams construct.
7449 ///
7450 /// Otherwise, return nullptr.
7451 static llvm::Value *
7452 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7453                                  const OMPExecutableDirective &D) {
7454   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7455          "Clauses associated with the teams directive expected to be emitted "
7456          "only for the host!");
7457   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7458   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7459          "Expected target-based executable directive.");
7460   CGBuilderTy &Bld = CGF.Builder;
7461   llvm::Value *ThreadLimitVal = nullptr;
7462   llvm::Value *NumThreadsVal = nullptr;
7463   switch (DirectiveKind) {
7464   case OMPD_target: {
7465     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7466     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7467       return NumThreads;
7468     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7469         CGF.getContext(), CS->getCapturedStmt());
7470     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7471       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7472         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7473         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7474         const auto *ThreadLimitClause =
7475             Dir->getSingleClause<OMPThreadLimitClause>();
7476         CodeGenFunction::LexicalScope Scope(
7477             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7478         if (const auto *PreInit =
7479                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7480           for (const auto *I : PreInit->decls()) {
7481             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7482               CGF.EmitVarDecl(cast<VarDecl>(*I));
7483             } else {
7484               CodeGenFunction::AutoVarEmission Emission =
7485                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7486               CGF.EmitAutoVarCleanups(Emission);
7487             }
7488           }
7489         }
7490         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7491             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7492         ThreadLimitVal =
7493             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7494       }
7495       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7496           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7497         CS = Dir->getInnermostCapturedStmt();
7498         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7499             CGF.getContext(), CS->getCapturedStmt());
7500         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7501       }
7502       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7503           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7504         CS = Dir->getInnermostCapturedStmt();
7505         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7506           return NumThreads;
7507       }
7508       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7509         return Bld.getInt32(1);
7510     }
7511     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7512   }
7513   case OMPD_target_teams: {
7514     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7515       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7516       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7517       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7518           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7519       ThreadLimitVal =
7520           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7521     }
7522     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7523     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7524       return NumThreads;
7525     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7526         CGF.getContext(), CS->getCapturedStmt());
7527     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7528       if (Dir->getDirectiveKind() == OMPD_distribute) {
7529         CS = Dir->getInnermostCapturedStmt();
7530         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7531           return NumThreads;
7532       }
7533     }
7534     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7535   }
7536   case OMPD_target_teams_distribute:
7537     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7538       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7539       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7540       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7541           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7542       ThreadLimitVal =
7543           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7544     }
7545     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7546   case OMPD_target_parallel:
7547   case OMPD_target_parallel_for:
7548   case OMPD_target_parallel_for_simd:
7549   case OMPD_target_teams_distribute_parallel_for:
7550   case OMPD_target_teams_distribute_parallel_for_simd: {
7551     llvm::Value *CondVal = nullptr;
7552     // Handle if clause. If if clause present, the number of threads is
7553     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7554     if (D.hasClausesOfKind<OMPIfClause>()) {
7555       const OMPIfClause *IfClause = nullptr;
7556       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7557         if (C->getNameModifier() == OMPD_unknown ||
7558             C->getNameModifier() == OMPD_parallel) {
7559           IfClause = C;
7560           break;
7561         }
7562       }
7563       if (IfClause) {
7564         const Expr *Cond = IfClause->getCondition();
7565         bool Result;
7566         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7567           if (!Result)
7568             return Bld.getInt32(1);
7569         } else {
7570           CodeGenFunction::RunCleanupsScope Scope(CGF);
7571           CondVal = CGF.EvaluateExprAsBool(Cond);
7572         }
7573       }
7574     }
7575     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7576       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7577       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7578       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7579           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7580       ThreadLimitVal =
7581           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7582     }
7583     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7584       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7585       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7586       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7587           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7588       NumThreadsVal =
7589           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7590       ThreadLimitVal = ThreadLimitVal
7591                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7592                                                                 ThreadLimitVal),
7593                                               NumThreadsVal, ThreadLimitVal)
7594                            : NumThreadsVal;
7595     }
7596     if (!ThreadLimitVal)
7597       ThreadLimitVal = Bld.getInt32(0);
7598     if (CondVal)
7599       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7600     return ThreadLimitVal;
7601   }
7602   case OMPD_target_teams_distribute_simd:
7603   case OMPD_target_simd:
7604     return Bld.getInt32(1);
7605   case OMPD_parallel:
7606   case OMPD_for:
7607   case OMPD_parallel_for:
7608   case OMPD_parallel_master:
7609   case OMPD_parallel_sections:
7610   case OMPD_for_simd:
7611   case OMPD_parallel_for_simd:
7612   case OMPD_cancel:
7613   case OMPD_cancellation_point:
7614   case OMPD_ordered:
7615   case OMPD_threadprivate:
7616   case OMPD_allocate:
7617   case OMPD_task:
7618   case OMPD_simd:
7619   case OMPD_sections:
7620   case OMPD_section:
7621   case OMPD_single:
7622   case OMPD_master:
7623   case OMPD_critical:
7624   case OMPD_taskyield:
7625   case OMPD_barrier:
7626   case OMPD_taskwait:
7627   case OMPD_taskgroup:
7628   case OMPD_atomic:
7629   case OMPD_flush:
7630   case OMPD_depobj:
7631   case OMPD_scan:
7632   case OMPD_teams:
7633   case OMPD_target_data:
7634   case OMPD_target_exit_data:
7635   case OMPD_target_enter_data:
7636   case OMPD_distribute:
7637   case OMPD_distribute_simd:
7638   case OMPD_distribute_parallel_for:
7639   case OMPD_distribute_parallel_for_simd:
7640   case OMPD_teams_distribute:
7641   case OMPD_teams_distribute_simd:
7642   case OMPD_teams_distribute_parallel_for:
7643   case OMPD_teams_distribute_parallel_for_simd:
7644   case OMPD_target_update:
7645   case OMPD_declare_simd:
7646   case OMPD_declare_variant:
7647   case OMPD_begin_declare_variant:
7648   case OMPD_end_declare_variant:
7649   case OMPD_declare_target:
7650   case OMPD_end_declare_target:
7651   case OMPD_declare_reduction:
7652   case OMPD_declare_mapper:
7653   case OMPD_taskloop:
7654   case OMPD_taskloop_simd:
7655   case OMPD_master_taskloop:
7656   case OMPD_master_taskloop_simd:
7657   case OMPD_parallel_master_taskloop:
7658   case OMPD_parallel_master_taskloop_simd:
7659   case OMPD_requires:
7660   case OMPD_unknown:
7661     break;
7662   }
7663   llvm_unreachable("Unsupported directive kind.");
7664 }
7665 
7666 namespace {
7667 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7668 
7669 // Utility to handle information from clauses associated with a given
7670 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7671 // It provides a convenient interface to obtain the information and generate
7672 // code for that information.
7673 class MappableExprsHandler {
7674 public:
7675   /// Values for bit flags used to specify the mapping type for
7676   /// offloading.
7677   enum OpenMPOffloadMappingFlags : uint64_t {
7678     /// No flags
7679     OMP_MAP_NONE = 0x0,
7680     /// Allocate memory on the device and move data from host to device.
7681     OMP_MAP_TO = 0x01,
7682     /// Allocate memory on the device and move data from device to host.
7683     OMP_MAP_FROM = 0x02,
7684     /// Always perform the requested mapping action on the element, even
7685     /// if it was already mapped before.
7686     OMP_MAP_ALWAYS = 0x04,
7687     /// Delete the element from the device environment, ignoring the
7688     /// current reference count associated with the element.
7689     OMP_MAP_DELETE = 0x08,
7690     /// The element being mapped is a pointer-pointee pair; both the
7691     /// pointer and the pointee should be mapped.
7692     OMP_MAP_PTR_AND_OBJ = 0x10,
7693     /// This flags signals that the base address of an entry should be
7694     /// passed to the target kernel as an argument.
7695     OMP_MAP_TARGET_PARAM = 0x20,
7696     /// Signal that the runtime library has to return the device pointer
7697     /// in the current position for the data being mapped. Used when we have the
7698     /// use_device_ptr clause.
7699     OMP_MAP_RETURN_PARAM = 0x40,
7700     /// This flag signals that the reference being passed is a pointer to
7701     /// private data.
7702     OMP_MAP_PRIVATE = 0x80,
7703     /// Pass the element to the device by value.
7704     OMP_MAP_LITERAL = 0x100,
7705     /// Implicit map
7706     OMP_MAP_IMPLICIT = 0x200,
7707     /// Close is a hint to the runtime to allocate memory close to
7708     /// the target device.
7709     OMP_MAP_CLOSE = 0x400,
7710     /// The 16 MSBs of the flags indicate whether the entry is member of some
7711     /// struct/class.
7712     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7713     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7714   };
7715 
7716   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7717   static unsigned getFlagMemberOffset() {
7718     unsigned Offset = 0;
7719     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7720          Remain = Remain >> 1)
7721       Offset++;
7722     return Offset;
7723   }
7724 
7725   /// Class that associates information with a base pointer to be passed to the
7726   /// runtime library.
7727   class BasePointerInfo {
7728     /// The base pointer.
7729     llvm::Value *Ptr = nullptr;
7730     /// The base declaration that refers to this device pointer, or null if
7731     /// there is none.
7732     const ValueDecl *DevPtrDecl = nullptr;
7733 
7734   public:
7735     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7736         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7737     llvm::Value *operator*() const { return Ptr; }
7738     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7739     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7740   };
7741 
7742   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7743   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7744   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7745 
7746   /// Map between a struct and the its lowest & highest elements which have been
7747   /// mapped.
7748   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7749   ///                    HE(FieldIndex, Pointer)}
7750   struct StructRangeInfoTy {
7751     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7752         0, Address::invalid()};
7753     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7754         0, Address::invalid()};
7755     Address Base = Address::invalid();
7756   };
7757 
7758 private:
7759   /// Kind that defines how a device pointer has to be returned.
7760   struct MapInfo {
7761     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7762     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7763     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7764     bool ReturnDevicePointer = false;
7765     bool IsImplicit = false;
7766 
7767     MapInfo() = default;
7768     MapInfo(
7769         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7770         OpenMPMapClauseKind MapType,
7771         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7772         bool ReturnDevicePointer, bool IsImplicit)
7773         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7774           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7775   };
7776 
7777   /// If use_device_ptr is used on a pointer which is a struct member and there
7778   /// is no map information about it, then emission of that entry is deferred
7779   /// until the whole struct has been processed.
7780   struct DeferredDevicePtrEntryTy {
7781     const Expr *IE = nullptr;
7782     const ValueDecl *VD = nullptr;
7783 
7784     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7785         : IE(IE), VD(VD) {}
7786   };
7787 
7788   /// The target directive from where the mappable clauses were extracted. It
7789   /// is either a executable directive or a user-defined mapper directive.
7790   llvm::PointerUnion<const OMPExecutableDirective *,
7791                      const OMPDeclareMapperDecl *>
7792       CurDir;
7793 
7794   /// Function the directive is being generated for.
7795   CodeGenFunction &CGF;
7796 
7797   /// Set of all first private variables in the current directive.
7798   /// bool data is set to true if the variable is implicitly marked as
7799   /// firstprivate, false otherwise.
7800   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7801 
7802   /// Map between device pointer declarations and their expression components.
7803   /// The key value for declarations in 'this' is null.
7804   llvm::DenseMap<
7805       const ValueDecl *,
7806       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7807       DevPointersMap;
7808 
7809   llvm::Value *getExprTypeSize(const Expr *E) const {
7810     QualType ExprTy = E->getType().getCanonicalType();
7811 
7812     // Calculate the size for array shaping expression.
7813     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7814       llvm::Value *Size =
7815           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7816       for (const Expr *SE : OAE->getDimensions()) {
7817         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7818         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7819                                       CGF.getContext().getSizeType(),
7820                                       SE->getExprLoc());
7821         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7822       }
7823       return Size;
7824     }
7825 
7826     // Reference types are ignored for mapping purposes.
7827     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7828       ExprTy = RefTy->getPointeeType().getCanonicalType();
7829 
7830     // Given that an array section is considered a built-in type, we need to
7831     // do the calculation based on the length of the section instead of relying
7832     // on CGF.getTypeSize(E->getType()).
7833     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7834       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7835                             OAE->getBase()->IgnoreParenImpCasts())
7836                             .getCanonicalType();
7837 
7838       // If there is no length associated with the expression and lower bound is
7839       // not specified too, that means we are using the whole length of the
7840       // base.
7841       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7842           !OAE->getLowerBound())
7843         return CGF.getTypeSize(BaseTy);
7844 
7845       llvm::Value *ElemSize;
7846       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7847         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7848       } else {
7849         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7850         assert(ATy && "Expecting array type if not a pointer type.");
7851         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7852       }
7853 
7854       // If we don't have a length at this point, that is because we have an
7855       // array section with a single element.
7856       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7857         return ElemSize;
7858 
7859       if (const Expr *LenExpr = OAE->getLength()) {
7860         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7861         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7862                                              CGF.getContext().getSizeType(),
7863                                              LenExpr->getExprLoc());
7864         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7865       }
7866       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7867              OAE->getLowerBound() && "expected array_section[lb:].");
7868       // Size = sizetype - lb * elemtype;
7869       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7870       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7871       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7872                                        CGF.getContext().getSizeType(),
7873                                        OAE->getLowerBound()->getExprLoc());
7874       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7875       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7876       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7877       LengthVal = CGF.Builder.CreateSelect(
7878           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7879       return LengthVal;
7880     }
7881     return CGF.getTypeSize(ExprTy);
7882   }
7883 
7884   /// Return the corresponding bits for a given map clause modifier. Add
7885   /// a flag marking the map as a pointer if requested. Add a flag marking the
7886   /// map as the first one of a series of maps that relate to the same map
7887   /// expression.
7888   OpenMPOffloadMappingFlags getMapTypeBits(
7889       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7890       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7891     OpenMPOffloadMappingFlags Bits =
7892         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7893     switch (MapType) {
7894     case OMPC_MAP_alloc:
7895     case OMPC_MAP_release:
7896       // alloc and release is the default behavior in the runtime library,  i.e.
7897       // if we don't pass any bits alloc/release that is what the runtime is
7898       // going to do. Therefore, we don't need to signal anything for these two
7899       // type modifiers.
7900       break;
7901     case OMPC_MAP_to:
7902       Bits |= OMP_MAP_TO;
7903       break;
7904     case OMPC_MAP_from:
7905       Bits |= OMP_MAP_FROM;
7906       break;
7907     case OMPC_MAP_tofrom:
7908       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7909       break;
7910     case OMPC_MAP_delete:
7911       Bits |= OMP_MAP_DELETE;
7912       break;
7913     case OMPC_MAP_unknown:
7914       llvm_unreachable("Unexpected map type!");
7915     }
7916     if (AddPtrFlag)
7917       Bits |= OMP_MAP_PTR_AND_OBJ;
7918     if (AddIsTargetParamFlag)
7919       Bits |= OMP_MAP_TARGET_PARAM;
7920     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7921         != MapModifiers.end())
7922       Bits |= OMP_MAP_ALWAYS;
7923     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7924         != MapModifiers.end())
7925       Bits |= OMP_MAP_CLOSE;
7926     return Bits;
7927   }
7928 
7929   /// Return true if the provided expression is a final array section. A
7930   /// final array section, is one whose length can't be proved to be one.
7931   bool isFinalArraySectionExpression(const Expr *E) const {
7932     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7933 
7934     // It is not an array section and therefore not a unity-size one.
7935     if (!OASE)
7936       return false;
7937 
7938     // An array section with no colon always refer to a single element.
7939     if (OASE->getColonLoc().isInvalid())
7940       return false;
7941 
7942     const Expr *Length = OASE->getLength();
7943 
7944     // If we don't have a length we have to check if the array has size 1
7945     // for this dimension. Also, we should always expect a length if the
7946     // base type is pointer.
7947     if (!Length) {
7948       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7949                              OASE->getBase()->IgnoreParenImpCasts())
7950                              .getCanonicalType();
7951       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7952         return ATy->getSize().getSExtValue() != 1;
7953       // If we don't have a constant dimension length, we have to consider
7954       // the current section as having any size, so it is not necessarily
7955       // unitary. If it happen to be unity size, that's user fault.
7956       return true;
7957     }
7958 
7959     // Check if the length evaluates to 1.
7960     Expr::EvalResult Result;
7961     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7962       return true; // Can have more that size 1.
7963 
7964     llvm::APSInt ConstLength = Result.Val.getInt();
7965     return ConstLength.getSExtValue() != 1;
7966   }
7967 
7968   /// Generate the base pointers, section pointers, sizes and map type
7969   /// bits for the provided map type, map modifier, and expression components.
7970   /// \a IsFirstComponent should be set to true if the provided set of
7971   /// components is the first associated with a capture.
7972   void generateInfoForComponentList(
7973       OpenMPMapClauseKind MapType,
7974       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7975       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7976       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7977       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7978       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7979       bool IsImplicit,
7980       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7981           OverlappedElements = llvm::None) const {
7982     // The following summarizes what has to be generated for each map and the
7983     // types below. The generated information is expressed in this order:
7984     // base pointer, section pointer, size, flags
7985     // (to add to the ones that come from the map type and modifier).
7986     //
7987     // double d;
7988     // int i[100];
7989     // float *p;
7990     //
7991     // struct S1 {
7992     //   int i;
7993     //   float f[50];
7994     // }
7995     // struct S2 {
7996     //   int i;
7997     //   float f[50];
7998     //   S1 s;
7999     //   double *p;
8000     //   struct S2 *ps;
8001     // }
8002     // S2 s;
8003     // S2 *ps;
8004     //
8005     // map(d)
8006     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
8007     //
8008     // map(i)
8009     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
8010     //
8011     // map(i[1:23])
8012     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
8013     //
8014     // map(p)
8015     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
8016     //
8017     // map(p[1:24])
8018     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
8019     //
8020     // map(s)
8021     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
8022     //
8023     // map(s.i)
8024     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
8025     //
8026     // map(s.s.f)
8027     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8028     //
8029     // map(s.p)
8030     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
8031     //
8032     // map(to: s.p[:22])
8033     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
8034     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
8035     // &(s.p), &(s.p[0]), 22*sizeof(double),
8036     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
8037     // (*) alloc space for struct members, only this is a target parameter
8038     // (**) map the pointer (nothing to be mapped in this example) (the compiler
8039     //      optimizes this entry out, same in the examples below)
8040     // (***) map the pointee (map: to)
8041     //
8042     // map(s.ps)
8043     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8044     //
8045     // map(from: s.ps->s.i)
8046     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8047     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8048     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
8049     //
8050     // map(to: s.ps->ps)
8051     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8052     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8053     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
8054     //
8055     // map(s.ps->ps->ps)
8056     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8057     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8058     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8059     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8060     //
8061     // map(to: s.ps->ps->s.f[:22])
8062     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8063     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8064     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8065     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8066     //
8067     // map(ps)
8068     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
8069     //
8070     // map(ps->i)
8071     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
8072     //
8073     // map(ps->s.f)
8074     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8075     //
8076     // map(from: ps->p)
8077     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
8078     //
8079     // map(to: ps->p[:22])
8080     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
8081     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
8082     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
8083     //
8084     // map(ps->ps)
8085     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8086     //
8087     // map(from: ps->ps->s.i)
8088     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8089     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8090     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8091     //
8092     // map(from: ps->ps->ps)
8093     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8094     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8095     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8096     //
8097     // map(ps->ps->ps->ps)
8098     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8099     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8100     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8101     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8102     //
8103     // map(to: ps->ps->ps->s.f[:22])
8104     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8105     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8106     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8107     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8108     //
8109     // map(to: s.f[:22]) map(from: s.p[:33])
8110     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
8111     //     sizeof(double*) (**), TARGET_PARAM
8112     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
8113     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
8114     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8115     // (*) allocate contiguous space needed to fit all mapped members even if
8116     //     we allocate space for members not mapped (in this example,
8117     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
8118     //     them as well because they fall between &s.f[0] and &s.p)
8119     //
8120     // map(from: s.f[:22]) map(to: ps->p[:33])
8121     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
8122     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8123     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
8124     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
8125     // (*) the struct this entry pertains to is the 2nd element in the list of
8126     //     arguments, hence MEMBER_OF(2)
8127     //
8128     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
8129     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
8130     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
8131     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
8132     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8133     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
8134     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
8135     // (*) the struct this entry pertains to is the 4th element in the list
8136     //     of arguments, hence MEMBER_OF(4)
8137 
8138     // Track if the map information being generated is the first for a capture.
8139     bool IsCaptureFirstInfo = IsFirstComponentList;
8140     // When the variable is on a declare target link or in a to clause with
8141     // unified memory, a reference is needed to hold the host/device address
8142     // of the variable.
8143     bool RequiresReference = false;
8144 
8145     // Scan the components from the base to the complete expression.
8146     auto CI = Components.rbegin();
8147     auto CE = Components.rend();
8148     auto I = CI;
8149 
8150     // Track if the map information being generated is the first for a list of
8151     // components.
8152     bool IsExpressionFirstInfo = true;
8153     Address BP = Address::invalid();
8154     const Expr *AssocExpr = I->getAssociatedExpression();
8155     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
8156     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8157     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
8158 
8159     if (isa<MemberExpr>(AssocExpr)) {
8160       // The base is the 'this' pointer. The content of the pointer is going
8161       // to be the base of the field being mapped.
8162       BP = CGF.LoadCXXThisAddress();
8163     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
8164                (OASE &&
8165                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
8166       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8167     } else if (OAShE &&
8168                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
8169       BP = Address(
8170           CGF.EmitScalarExpr(OAShE->getBase()),
8171           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
8172     } else {
8173       // The base is the reference to the variable.
8174       // BP = &Var.
8175       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8176       if (const auto *VD =
8177               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
8178         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8179                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8180           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8181               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
8182                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
8183             RequiresReference = true;
8184             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
8185           }
8186         }
8187       }
8188 
8189       // If the variable is a pointer and is being dereferenced (i.e. is not
8190       // the last component), the base has to be the pointer itself, not its
8191       // reference. References are ignored for mapping purposes.
8192       QualType Ty =
8193           I->getAssociatedDeclaration()->getType().getNonReferenceType();
8194       if (Ty->isAnyPointerType() && std::next(I) != CE) {
8195         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8196 
8197         // We do not need to generate individual map information for the
8198         // pointer, it can be associated with the combined storage.
8199         ++I;
8200       }
8201     }
8202 
8203     // Track whether a component of the list should be marked as MEMBER_OF some
8204     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8205     // in a component list should be marked as MEMBER_OF, all subsequent entries
8206     // do not belong to the base struct. E.g.
8207     // struct S2 s;
8208     // s.ps->ps->ps->f[:]
8209     //   (1) (2) (3) (4)
8210     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8211     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8212     // is the pointee of ps(2) which is not member of struct s, so it should not
8213     // be marked as such (it is still PTR_AND_OBJ).
8214     // The variable is initialized to false so that PTR_AND_OBJ entries which
8215     // are not struct members are not considered (e.g. array of pointers to
8216     // data).
8217     bool ShouldBeMemberOf = false;
8218 
8219     // Variable keeping track of whether or not we have encountered a component
8220     // in the component list which is a member expression. Useful when we have a
8221     // pointer or a final array section, in which case it is the previous
8222     // component in the list which tells us whether we have a member expression.
8223     // E.g. X.f[:]
8224     // While processing the final array section "[:]" it is "f" which tells us
8225     // whether we are dealing with a member of a declared struct.
8226     const MemberExpr *EncounteredME = nullptr;
8227 
8228     for (; I != CE; ++I) {
8229       // If the current component is member of a struct (parent struct) mark it.
8230       if (!EncounteredME) {
8231         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8232         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8233         // as MEMBER_OF the parent struct.
8234         if (EncounteredME)
8235           ShouldBeMemberOf = true;
8236       }
8237 
8238       auto Next = std::next(I);
8239 
8240       // We need to generate the addresses and sizes if this is the last
8241       // component, if the component is a pointer or if it is an array section
8242       // whose length can't be proved to be one. If this is a pointer, it
8243       // becomes the base address for the following components.
8244 
8245       // A final array section, is one whose length can't be proved to be one.
8246       bool IsFinalArraySection =
8247           isFinalArraySectionExpression(I->getAssociatedExpression());
8248 
8249       // Get information on whether the element is a pointer. Have to do a
8250       // special treatment for array sections given that they are built-in
8251       // types.
8252       const auto *OASE =
8253           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8254       const auto *OAShE =
8255           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8256       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8257       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8258       bool IsPointer =
8259           OAShE ||
8260           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8261                        .getCanonicalType()
8262                        ->isAnyPointerType()) ||
8263           I->getAssociatedExpression()->getType()->isAnyPointerType();
8264       bool IsNonDerefPointer = IsPointer && !UO && !BO;
8265 
8266       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
8267         // If this is not the last component, we expect the pointer to be
8268         // associated with an array expression or member expression.
8269         assert((Next == CE ||
8270                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8271                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8272                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8273                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8274                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8275                "Unexpected expression");
8276 
8277         Address LB = Address::invalid();
8278         if (OAShE) {
8279           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8280                        CGF.getContext().getTypeAlignInChars(
8281                            OAShE->getBase()->getType()));
8282         } else {
8283           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8284                    .getAddress(CGF);
8285         }
8286 
8287         // If this component is a pointer inside the base struct then we don't
8288         // need to create any entry for it - it will be combined with the object
8289         // it is pointing to into a single PTR_AND_OBJ entry.
8290         bool IsMemberPointer =
8291             IsPointer && EncounteredME &&
8292             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
8293              EncounteredME);
8294         if (!OverlappedElements.empty()) {
8295           // Handle base element with the info for overlapped elements.
8296           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8297           assert(Next == CE &&
8298                  "Expected last element for the overlapped elements.");
8299           assert(!IsPointer &&
8300                  "Unexpected base element with the pointer type.");
8301           // Mark the whole struct as the struct that requires allocation on the
8302           // device.
8303           PartialStruct.LowestElem = {0, LB};
8304           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8305               I->getAssociatedExpression()->getType());
8306           Address HB = CGF.Builder.CreateConstGEP(
8307               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
8308                                                               CGF.VoidPtrTy),
8309               TypeSize.getQuantity() - 1);
8310           PartialStruct.HighestElem = {
8311               std::numeric_limits<decltype(
8312                   PartialStruct.HighestElem.first)>::max(),
8313               HB};
8314           PartialStruct.Base = BP;
8315           // Emit data for non-overlapped data.
8316           OpenMPOffloadMappingFlags Flags =
8317               OMP_MAP_MEMBER_OF |
8318               getMapTypeBits(MapType, MapModifiers, IsImplicit,
8319                              /*AddPtrFlag=*/false,
8320                              /*AddIsTargetParamFlag=*/false);
8321           LB = BP;
8322           llvm::Value *Size = nullptr;
8323           // Do bitcopy of all non-overlapped structure elements.
8324           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8325                    Component : OverlappedElements) {
8326             Address ComponentLB = Address::invalid();
8327             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8328                  Component) {
8329               if (MC.getAssociatedDeclaration()) {
8330                 ComponentLB =
8331                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8332                         .getAddress(CGF);
8333                 Size = CGF.Builder.CreatePtrDiff(
8334                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8335                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8336                 break;
8337               }
8338             }
8339             BasePointers.push_back(BP.getPointer());
8340             Pointers.push_back(LB.getPointer());
8341             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
8342                                                       /*isSigned=*/true));
8343             Types.push_back(Flags);
8344             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8345           }
8346           BasePointers.push_back(BP.getPointer());
8347           Pointers.push_back(LB.getPointer());
8348           Size = CGF.Builder.CreatePtrDiff(
8349               CGF.EmitCastToVoidPtr(
8350                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
8351               CGF.EmitCastToVoidPtr(LB.getPointer()));
8352           Sizes.push_back(
8353               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8354           Types.push_back(Flags);
8355           break;
8356         }
8357         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8358         if (!IsMemberPointer) {
8359           BasePointers.push_back(BP.getPointer());
8360           Pointers.push_back(LB.getPointer());
8361           Sizes.push_back(
8362               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8363 
8364           // We need to add a pointer flag for each map that comes from the
8365           // same expression except for the first one. We also need to signal
8366           // this map is the first one that relates with the current capture
8367           // (there is a set of entries for each capture).
8368           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8369               MapType, MapModifiers, IsImplicit,
8370               !IsExpressionFirstInfo || RequiresReference,
8371               IsCaptureFirstInfo && !RequiresReference);
8372 
8373           if (!IsExpressionFirstInfo) {
8374             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8375             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8376             if (IsPointer)
8377               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8378                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8379 
8380             if (ShouldBeMemberOf) {
8381               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8382               // should be later updated with the correct value of MEMBER_OF.
8383               Flags |= OMP_MAP_MEMBER_OF;
8384               // From now on, all subsequent PTR_AND_OBJ entries should not be
8385               // marked as MEMBER_OF.
8386               ShouldBeMemberOf = false;
8387             }
8388           }
8389 
8390           Types.push_back(Flags);
8391         }
8392 
8393         // If we have encountered a member expression so far, keep track of the
8394         // mapped member. If the parent is "*this", then the value declaration
8395         // is nullptr.
8396         if (EncounteredME) {
8397           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8398           unsigned FieldIndex = FD->getFieldIndex();
8399 
8400           // Update info about the lowest and highest elements for this struct
8401           if (!PartialStruct.Base.isValid()) {
8402             PartialStruct.LowestElem = {FieldIndex, LB};
8403             PartialStruct.HighestElem = {FieldIndex, LB};
8404             PartialStruct.Base = BP;
8405           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8406             PartialStruct.LowestElem = {FieldIndex, LB};
8407           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8408             PartialStruct.HighestElem = {FieldIndex, LB};
8409           }
8410         }
8411 
8412         // If we have a final array section, we are done with this expression.
8413         if (IsFinalArraySection)
8414           break;
8415 
8416         // The pointer becomes the base for the next element.
8417         if (Next != CE)
8418           BP = LB;
8419 
8420         IsExpressionFirstInfo = false;
8421         IsCaptureFirstInfo = false;
8422       }
8423     }
8424   }
8425 
8426   /// Return the adjusted map modifiers if the declaration a capture refers to
8427   /// appears in a first-private clause. This is expected to be used only with
8428   /// directives that start with 'target'.
8429   MappableExprsHandler::OpenMPOffloadMappingFlags
8430   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8431     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8432 
8433     // A first private variable captured by reference will use only the
8434     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8435     // declaration is known as first-private in this handler.
8436     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8437       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8438           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8439         return MappableExprsHandler::OMP_MAP_ALWAYS |
8440                MappableExprsHandler::OMP_MAP_TO;
8441       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8442         return MappableExprsHandler::OMP_MAP_TO |
8443                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8444       return MappableExprsHandler::OMP_MAP_PRIVATE |
8445              MappableExprsHandler::OMP_MAP_TO;
8446     }
8447     return MappableExprsHandler::OMP_MAP_TO |
8448            MappableExprsHandler::OMP_MAP_FROM;
8449   }
8450 
8451   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8452     // Rotate by getFlagMemberOffset() bits.
8453     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8454                                                   << getFlagMemberOffset());
8455   }
8456 
8457   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8458                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8459     // If the entry is PTR_AND_OBJ but has not been marked with the special
8460     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8461     // marked as MEMBER_OF.
8462     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8463         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8464       return;
8465 
8466     // Reset the placeholder value to prepare the flag for the assignment of the
8467     // proper MEMBER_OF value.
8468     Flags &= ~OMP_MAP_MEMBER_OF;
8469     Flags |= MemberOfFlag;
8470   }
8471 
8472   void getPlainLayout(const CXXRecordDecl *RD,
8473                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8474                       bool AsBase) const {
8475     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8476 
8477     llvm::StructType *St =
8478         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8479 
8480     unsigned NumElements = St->getNumElements();
8481     llvm::SmallVector<
8482         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8483         RecordLayout(NumElements);
8484 
8485     // Fill bases.
8486     for (const auto &I : RD->bases()) {
8487       if (I.isVirtual())
8488         continue;
8489       const auto *Base = I.getType()->getAsCXXRecordDecl();
8490       // Ignore empty bases.
8491       if (Base->isEmpty() || CGF.getContext()
8492                                  .getASTRecordLayout(Base)
8493                                  .getNonVirtualSize()
8494                                  .isZero())
8495         continue;
8496 
8497       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8498       RecordLayout[FieldIndex] = Base;
8499     }
8500     // Fill in virtual bases.
8501     for (const auto &I : RD->vbases()) {
8502       const auto *Base = I.getType()->getAsCXXRecordDecl();
8503       // Ignore empty bases.
8504       if (Base->isEmpty())
8505         continue;
8506       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8507       if (RecordLayout[FieldIndex])
8508         continue;
8509       RecordLayout[FieldIndex] = Base;
8510     }
8511     // Fill in all the fields.
8512     assert(!RD->isUnion() && "Unexpected union.");
8513     for (const auto *Field : RD->fields()) {
8514       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8515       // will fill in later.)
8516       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8517         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8518         RecordLayout[FieldIndex] = Field;
8519       }
8520     }
8521     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8522              &Data : RecordLayout) {
8523       if (Data.isNull())
8524         continue;
8525       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8526         getPlainLayout(Base, Layout, /*AsBase=*/true);
8527       else
8528         Layout.push_back(Data.get<const FieldDecl *>());
8529     }
8530   }
8531 
8532 public:
8533   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8534       : CurDir(&Dir), CGF(CGF) {
8535     // Extract firstprivate clause information.
8536     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8537       for (const auto *D : C->varlists())
8538         FirstPrivateDecls.try_emplace(
8539             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8540     // Extract device pointer clause information.
8541     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8542       for (auto L : C->component_lists())
8543         DevPointersMap[L.first].push_back(L.second);
8544   }
8545 
8546   /// Constructor for the declare mapper directive.
8547   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8548       : CurDir(&Dir), CGF(CGF) {}
8549 
8550   /// Generate code for the combined entry if we have a partially mapped struct
8551   /// and take care of the mapping flags of the arguments corresponding to
8552   /// individual struct members.
8553   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8554                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8555                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8556                          const StructRangeInfoTy &PartialStruct) const {
8557     // Base is the base of the struct
8558     BasePointers.push_back(PartialStruct.Base.getPointer());
8559     // Pointer is the address of the lowest element
8560     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8561     Pointers.push_back(LB);
8562     // Size is (addr of {highest+1} element) - (addr of lowest element)
8563     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8564     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8565     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8566     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8567     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8568     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8569                                                   /*isSigned=*/false);
8570     Sizes.push_back(Size);
8571     // Map type is always TARGET_PARAM
8572     Types.push_back(OMP_MAP_TARGET_PARAM);
8573     // Remove TARGET_PARAM flag from the first element
8574     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8575 
8576     // All other current entries will be MEMBER_OF the combined entry
8577     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8578     // 0xFFFF in the MEMBER_OF field).
8579     OpenMPOffloadMappingFlags MemberOfFlag =
8580         getMemberOfFlag(BasePointers.size() - 1);
8581     for (auto &M : CurTypes)
8582       setCorrectMemberOfFlag(M, MemberOfFlag);
8583   }
8584 
8585   /// Generate all the base pointers, section pointers, sizes and map
8586   /// types for the extracted mappable expressions. Also, for each item that
8587   /// relates with a device pointer, a pair of the relevant declaration and
8588   /// index where it occurs is appended to the device pointers info array.
8589   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8590                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8591                        MapFlagsArrayTy &Types) const {
8592     // We have to process the component lists that relate with the same
8593     // declaration in a single chunk so that we can generate the map flags
8594     // correctly. Therefore, we organize all lists in a map.
8595     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8596 
8597     // Helper function to fill the information map for the different supported
8598     // clauses.
8599     auto &&InfoGen = [&Info](
8600         const ValueDecl *D,
8601         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8602         OpenMPMapClauseKind MapType,
8603         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8604         bool ReturnDevicePointer, bool IsImplicit) {
8605       const ValueDecl *VD =
8606           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8607       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8608                             IsImplicit);
8609     };
8610 
8611     assert(CurDir.is<const OMPExecutableDirective *>() &&
8612            "Expect a executable directive");
8613     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8614     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8615       for (const auto L : C->component_lists()) {
8616         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8617             /*ReturnDevicePointer=*/false, C->isImplicit());
8618       }
8619     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8620       for (const auto L : C->component_lists()) {
8621         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8622             /*ReturnDevicePointer=*/false, C->isImplicit());
8623       }
8624     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8625       for (const auto L : C->component_lists()) {
8626         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8627             /*ReturnDevicePointer=*/false, C->isImplicit());
8628       }
8629 
8630     // Look at the use_device_ptr clause information and mark the existing map
8631     // entries as such. If there is no map information for an entry in the
8632     // use_device_ptr list, we create one with map type 'alloc' and zero size
8633     // section. It is the user fault if that was not mapped before. If there is
8634     // no map information and the pointer is a struct member, then we defer the
8635     // emission of that entry until the whole struct has been processed.
8636     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8637         DeferredInfo;
8638 
8639     for (const auto *C :
8640          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8641       for (const auto L : C->component_lists()) {
8642         assert(!L.second.empty() && "Not expecting empty list of components!");
8643         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8644         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8645         const Expr *IE = L.second.back().getAssociatedExpression();
8646         // If the first component is a member expression, we have to look into
8647         // 'this', which maps to null in the map of map information. Otherwise
8648         // look directly for the information.
8649         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8650 
8651         // We potentially have map information for this declaration already.
8652         // Look for the first set of components that refer to it.
8653         if (It != Info.end()) {
8654           auto CI = std::find_if(
8655               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8656                 return MI.Components.back().getAssociatedDeclaration() == VD;
8657               });
8658           // If we found a map entry, signal that the pointer has to be returned
8659           // and move on to the next declaration.
8660           if (CI != It->second.end()) {
8661             CI->ReturnDevicePointer = true;
8662             continue;
8663           }
8664         }
8665 
8666         // We didn't find any match in our map information - generate a zero
8667         // size array section - if the pointer is a struct member we defer this
8668         // action until the whole struct has been processed.
8669         if (isa<MemberExpr>(IE)) {
8670           // Insert the pointer into Info to be processed by
8671           // generateInfoForComponentList. Because it is a member pointer
8672           // without a pointee, no entry will be generated for it, therefore
8673           // we need to generate one after the whole struct has been processed.
8674           // Nonetheless, generateInfoForComponentList must be called to take
8675           // the pointer into account for the calculation of the range of the
8676           // partial struct.
8677           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8678                   /*ReturnDevicePointer=*/false, C->isImplicit());
8679           DeferredInfo[nullptr].emplace_back(IE, VD);
8680         } else {
8681           llvm::Value *Ptr =
8682               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8683           BasePointers.emplace_back(Ptr, VD);
8684           Pointers.push_back(Ptr);
8685           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8686           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8687         }
8688       }
8689     }
8690 
8691     for (const auto &M : Info) {
8692       // We need to know when we generate information for the first component
8693       // associated with a capture, because the mapping flags depend on it.
8694       bool IsFirstComponentList = true;
8695 
8696       // Temporary versions of arrays
8697       MapBaseValuesArrayTy CurBasePointers;
8698       MapValuesArrayTy CurPointers;
8699       MapValuesArrayTy CurSizes;
8700       MapFlagsArrayTy CurTypes;
8701       StructRangeInfoTy PartialStruct;
8702 
8703       for (const MapInfo &L : M.second) {
8704         assert(!L.Components.empty() &&
8705                "Not expecting declaration with no component lists.");
8706 
8707         // Remember the current base pointer index.
8708         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8709         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8710                                      CurBasePointers, CurPointers, CurSizes,
8711                                      CurTypes, PartialStruct,
8712                                      IsFirstComponentList, L.IsImplicit);
8713 
8714         // If this entry relates with a device pointer, set the relevant
8715         // declaration and add the 'return pointer' flag.
8716         if (L.ReturnDevicePointer) {
8717           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8718                  "Unexpected number of mapped base pointers.");
8719 
8720           const ValueDecl *RelevantVD =
8721               L.Components.back().getAssociatedDeclaration();
8722           assert(RelevantVD &&
8723                  "No relevant declaration related with device pointer??");
8724 
8725           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8726           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8727         }
8728         IsFirstComponentList = false;
8729       }
8730 
8731       // Append any pending zero-length pointers which are struct members and
8732       // used with use_device_ptr.
8733       auto CI = DeferredInfo.find(M.first);
8734       if (CI != DeferredInfo.end()) {
8735         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8736           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8737           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8738               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8739           CurBasePointers.emplace_back(BasePtr, L.VD);
8740           CurPointers.push_back(Ptr);
8741           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8742           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8743           // value MEMBER_OF=FFFF so that the entry is later updated with the
8744           // correct value of MEMBER_OF.
8745           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8746                              OMP_MAP_MEMBER_OF);
8747         }
8748       }
8749 
8750       // If there is an entry in PartialStruct it means we have a struct with
8751       // individual members mapped. Emit an extra combined entry.
8752       if (PartialStruct.Base.isValid())
8753         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8754                           PartialStruct);
8755 
8756       // We need to append the results of this capture to what we already have.
8757       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8758       Pointers.append(CurPointers.begin(), CurPointers.end());
8759       Sizes.append(CurSizes.begin(), CurSizes.end());
8760       Types.append(CurTypes.begin(), CurTypes.end());
8761     }
8762   }
8763 
8764   /// Generate all the base pointers, section pointers, sizes and map types for
8765   /// the extracted map clauses of user-defined mapper.
8766   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8767                                 MapValuesArrayTy &Pointers,
8768                                 MapValuesArrayTy &Sizes,
8769                                 MapFlagsArrayTy &Types) const {
8770     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8771            "Expect a declare mapper directive");
8772     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8773     // We have to process the component lists that relate with the same
8774     // declaration in a single chunk so that we can generate the map flags
8775     // correctly. Therefore, we organize all lists in a map.
8776     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8777 
8778     // Helper function to fill the information map for the different supported
8779     // clauses.
8780     auto &&InfoGen = [&Info](
8781         const ValueDecl *D,
8782         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8783         OpenMPMapClauseKind MapType,
8784         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8785         bool ReturnDevicePointer, bool IsImplicit) {
8786       const ValueDecl *VD =
8787           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8788       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8789                             IsImplicit);
8790     };
8791 
8792     for (const auto *C : CurMapperDir->clauselists()) {
8793       const auto *MC = cast<OMPMapClause>(C);
8794       for (const auto L : MC->component_lists()) {
8795         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8796                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8797       }
8798     }
8799 
8800     for (const auto &M : Info) {
8801       // We need to know when we generate information for the first component
8802       // associated with a capture, because the mapping flags depend on it.
8803       bool IsFirstComponentList = true;
8804 
8805       // Temporary versions of arrays
8806       MapBaseValuesArrayTy CurBasePointers;
8807       MapValuesArrayTy CurPointers;
8808       MapValuesArrayTy CurSizes;
8809       MapFlagsArrayTy CurTypes;
8810       StructRangeInfoTy PartialStruct;
8811 
8812       for (const MapInfo &L : M.second) {
8813         assert(!L.Components.empty() &&
8814                "Not expecting declaration with no component lists.");
8815         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8816                                      CurBasePointers, CurPointers, CurSizes,
8817                                      CurTypes, PartialStruct,
8818                                      IsFirstComponentList, L.IsImplicit);
8819         IsFirstComponentList = false;
8820       }
8821 
8822       // If there is an entry in PartialStruct it means we have a struct with
8823       // individual members mapped. Emit an extra combined entry.
8824       if (PartialStruct.Base.isValid())
8825         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8826                           PartialStruct);
8827 
8828       // We need to append the results of this capture to what we already have.
8829       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8830       Pointers.append(CurPointers.begin(), CurPointers.end());
8831       Sizes.append(CurSizes.begin(), CurSizes.end());
8832       Types.append(CurTypes.begin(), CurTypes.end());
8833     }
8834   }
8835 
8836   /// Emit capture info for lambdas for variables captured by reference.
8837   void generateInfoForLambdaCaptures(
8838       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8839       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8840       MapFlagsArrayTy &Types,
8841       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8842     const auto *RD = VD->getType()
8843                          .getCanonicalType()
8844                          .getNonReferenceType()
8845                          ->getAsCXXRecordDecl();
8846     if (!RD || !RD->isLambda())
8847       return;
8848     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8849     LValue VDLVal = CGF.MakeAddrLValue(
8850         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8851     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8852     FieldDecl *ThisCapture = nullptr;
8853     RD->getCaptureFields(Captures, ThisCapture);
8854     if (ThisCapture) {
8855       LValue ThisLVal =
8856           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8857       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8858       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8859                                  VDLVal.getPointer(CGF));
8860       BasePointers.push_back(ThisLVal.getPointer(CGF));
8861       Pointers.push_back(ThisLValVal.getPointer(CGF));
8862       Sizes.push_back(
8863           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8864                                     CGF.Int64Ty, /*isSigned=*/true));
8865       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8866                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8867     }
8868     for (const LambdaCapture &LC : RD->captures()) {
8869       if (!LC.capturesVariable())
8870         continue;
8871       const VarDecl *VD = LC.getCapturedVar();
8872       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8873         continue;
8874       auto It = Captures.find(VD);
8875       assert(It != Captures.end() && "Found lambda capture without field.");
8876       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8877       if (LC.getCaptureKind() == LCK_ByRef) {
8878         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8879         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8880                                    VDLVal.getPointer(CGF));
8881         BasePointers.push_back(VarLVal.getPointer(CGF));
8882         Pointers.push_back(VarLValVal.getPointer(CGF));
8883         Sizes.push_back(CGF.Builder.CreateIntCast(
8884             CGF.getTypeSize(
8885                 VD->getType().getCanonicalType().getNonReferenceType()),
8886             CGF.Int64Ty, /*isSigned=*/true));
8887       } else {
8888         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8889         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8890                                    VDLVal.getPointer(CGF));
8891         BasePointers.push_back(VarLVal.getPointer(CGF));
8892         Pointers.push_back(VarRVal.getScalarVal());
8893         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8894       }
8895       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8896                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8897     }
8898   }
8899 
8900   /// Set correct indices for lambdas captures.
8901   void adjustMemberOfForLambdaCaptures(
8902       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8903       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8904       MapFlagsArrayTy &Types) const {
8905     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8906       // Set correct member_of idx for all implicit lambda captures.
8907       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8908                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8909         continue;
8910       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8911       assert(BasePtr && "Unable to find base lambda address.");
8912       int TgtIdx = -1;
8913       for (unsigned J = I; J > 0; --J) {
8914         unsigned Idx = J - 1;
8915         if (Pointers[Idx] != BasePtr)
8916           continue;
8917         TgtIdx = Idx;
8918         break;
8919       }
8920       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8921       // All other current entries will be MEMBER_OF the combined entry
8922       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8923       // 0xFFFF in the MEMBER_OF field).
8924       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8925       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8926     }
8927   }
8928 
8929   /// Generate the base pointers, section pointers, sizes and map types
8930   /// associated to a given capture.
8931   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8932                               llvm::Value *Arg,
8933                               MapBaseValuesArrayTy &BasePointers,
8934                               MapValuesArrayTy &Pointers,
8935                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8936                               StructRangeInfoTy &PartialStruct) const {
8937     assert(!Cap->capturesVariableArrayType() &&
8938            "Not expecting to generate map info for a variable array type!");
8939 
8940     // We need to know when we generating information for the first component
8941     const ValueDecl *VD = Cap->capturesThis()
8942                               ? nullptr
8943                               : Cap->getCapturedVar()->getCanonicalDecl();
8944 
8945     // If this declaration appears in a is_device_ptr clause we just have to
8946     // pass the pointer by value. If it is a reference to a declaration, we just
8947     // pass its value.
8948     if (DevPointersMap.count(VD)) {
8949       BasePointers.emplace_back(Arg, VD);
8950       Pointers.push_back(Arg);
8951       Sizes.push_back(
8952           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8953                                     CGF.Int64Ty, /*isSigned=*/true));
8954       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8955       return;
8956     }
8957 
8958     using MapData =
8959         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8960                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8961     SmallVector<MapData, 4> DeclComponentLists;
8962     assert(CurDir.is<const OMPExecutableDirective *>() &&
8963            "Expect a executable directive");
8964     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8965     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8966       for (const auto L : C->decl_component_lists(VD)) {
8967         assert(L.first == VD &&
8968                "We got information for the wrong declaration??");
8969         assert(!L.second.empty() &&
8970                "Not expecting declaration with no component lists.");
8971         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8972                                         C->getMapTypeModifiers(),
8973                                         C->isImplicit());
8974       }
8975     }
8976 
8977     // Find overlapping elements (including the offset from the base element).
8978     llvm::SmallDenseMap<
8979         const MapData *,
8980         llvm::SmallVector<
8981             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8982         4>
8983         OverlappedData;
8984     size_t Count = 0;
8985     for (const MapData &L : DeclComponentLists) {
8986       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8987       OpenMPMapClauseKind MapType;
8988       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8989       bool IsImplicit;
8990       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8991       ++Count;
8992       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8993         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8994         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8995         auto CI = Components.rbegin();
8996         auto CE = Components.rend();
8997         auto SI = Components1.rbegin();
8998         auto SE = Components1.rend();
8999         for (; CI != CE && SI != SE; ++CI, ++SI) {
9000           if (CI->getAssociatedExpression()->getStmtClass() !=
9001               SI->getAssociatedExpression()->getStmtClass())
9002             break;
9003           // Are we dealing with different variables/fields?
9004           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9005             break;
9006         }
9007         // Found overlapping if, at least for one component, reached the head of
9008         // the components list.
9009         if (CI == CE || SI == SE) {
9010           assert((CI != CE || SI != SE) &&
9011                  "Unexpected full match of the mapping components.");
9012           const MapData &BaseData = CI == CE ? L : L1;
9013           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9014               SI == SE ? Components : Components1;
9015           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9016           OverlappedElements.getSecond().push_back(SubData);
9017         }
9018       }
9019     }
9020     // Sort the overlapped elements for each item.
9021     llvm::SmallVector<const FieldDecl *, 4> Layout;
9022     if (!OverlappedData.empty()) {
9023       if (const auto *CRD =
9024               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
9025         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9026       else {
9027         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
9028         Layout.append(RD->field_begin(), RD->field_end());
9029       }
9030     }
9031     for (auto &Pair : OverlappedData) {
9032       llvm::sort(
9033           Pair.getSecond(),
9034           [&Layout](
9035               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9036               OMPClauseMappableExprCommon::MappableExprComponentListRef
9037                   Second) {
9038             auto CI = First.rbegin();
9039             auto CE = First.rend();
9040             auto SI = Second.rbegin();
9041             auto SE = Second.rend();
9042             for (; CI != CE && SI != SE; ++CI, ++SI) {
9043               if (CI->getAssociatedExpression()->getStmtClass() !=
9044                   SI->getAssociatedExpression()->getStmtClass())
9045                 break;
9046               // Are we dealing with different variables/fields?
9047               if (CI->getAssociatedDeclaration() !=
9048                   SI->getAssociatedDeclaration())
9049                 break;
9050             }
9051 
9052             // Lists contain the same elements.
9053             if (CI == CE && SI == SE)
9054               return false;
9055 
9056             // List with less elements is less than list with more elements.
9057             if (CI == CE || SI == SE)
9058               return CI == CE;
9059 
9060             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9061             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9062             if (FD1->getParent() == FD2->getParent())
9063               return FD1->getFieldIndex() < FD2->getFieldIndex();
9064             const auto It =
9065                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9066                   return FD == FD1 || FD == FD2;
9067                 });
9068             return *It == FD1;
9069           });
9070     }
9071 
9072     // Associated with a capture, because the mapping flags depend on it.
9073     // Go through all of the elements with the overlapped elements.
9074     for (const auto &Pair : OverlappedData) {
9075       const MapData &L = *Pair.getFirst();
9076       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9077       OpenMPMapClauseKind MapType;
9078       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9079       bool IsImplicit;
9080       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9081       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9082           OverlappedComponents = Pair.getSecond();
9083       bool IsFirstComponentList = true;
9084       generateInfoForComponentList(MapType, MapModifiers, Components,
9085                                    BasePointers, Pointers, Sizes, Types,
9086                                    PartialStruct, IsFirstComponentList,
9087                                    IsImplicit, OverlappedComponents);
9088     }
9089     // Go through other elements without overlapped elements.
9090     bool IsFirstComponentList = OverlappedData.empty();
9091     for (const MapData &L : DeclComponentLists) {
9092       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9093       OpenMPMapClauseKind MapType;
9094       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9095       bool IsImplicit;
9096       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9097       auto It = OverlappedData.find(&L);
9098       if (It == OverlappedData.end())
9099         generateInfoForComponentList(MapType, MapModifiers, Components,
9100                                      BasePointers, Pointers, Sizes, Types,
9101                                      PartialStruct, IsFirstComponentList,
9102                                      IsImplicit);
9103       IsFirstComponentList = false;
9104     }
9105   }
9106 
9107   /// Generate the base pointers, section pointers, sizes and map types
9108   /// associated with the declare target link variables.
9109   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
9110                                         MapValuesArrayTy &Pointers,
9111                                         MapValuesArrayTy &Sizes,
9112                                         MapFlagsArrayTy &Types) const {
9113     assert(CurDir.is<const OMPExecutableDirective *>() &&
9114            "Expect a executable directive");
9115     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9116     // Map other list items in the map clause which are not captured variables
9117     // but "declare target link" global variables.
9118     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9119       for (const auto L : C->component_lists()) {
9120         if (!L.first)
9121           continue;
9122         const auto *VD = dyn_cast<VarDecl>(L.first);
9123         if (!VD)
9124           continue;
9125         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9126             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9127         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9128             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
9129           continue;
9130         StructRangeInfoTy PartialStruct;
9131         generateInfoForComponentList(
9132             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
9133             Pointers, Sizes, Types, PartialStruct,
9134             /*IsFirstComponentList=*/true, C->isImplicit());
9135         assert(!PartialStruct.Base.isValid() &&
9136                "No partial structs for declare target link expected.");
9137       }
9138     }
9139   }
9140 
9141   /// Generate the default map information for a given capture \a CI,
9142   /// record field declaration \a RI and captured value \a CV.
9143   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9144                               const FieldDecl &RI, llvm::Value *CV,
9145                               MapBaseValuesArrayTy &CurBasePointers,
9146                               MapValuesArrayTy &CurPointers,
9147                               MapValuesArrayTy &CurSizes,
9148                               MapFlagsArrayTy &CurMapTypes) const {
9149     bool IsImplicit = true;
9150     // Do the default mapping.
9151     if (CI.capturesThis()) {
9152       CurBasePointers.push_back(CV);
9153       CurPointers.push_back(CV);
9154       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9155       CurSizes.push_back(
9156           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9157                                     CGF.Int64Ty, /*isSigned=*/true));
9158       // Default map type.
9159       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9160     } else if (CI.capturesVariableByCopy()) {
9161       CurBasePointers.push_back(CV);
9162       CurPointers.push_back(CV);
9163       if (!RI.getType()->isAnyPointerType()) {
9164         // We have to signal to the runtime captures passed by value that are
9165         // not pointers.
9166         CurMapTypes.push_back(OMP_MAP_LITERAL);
9167         CurSizes.push_back(CGF.Builder.CreateIntCast(
9168             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9169       } else {
9170         // Pointers are implicitly mapped with a zero size and no flags
9171         // (other than first map that is added for all implicit maps).
9172         CurMapTypes.push_back(OMP_MAP_NONE);
9173         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9174       }
9175       const VarDecl *VD = CI.getCapturedVar();
9176       auto I = FirstPrivateDecls.find(VD);
9177       if (I != FirstPrivateDecls.end())
9178         IsImplicit = I->getSecond();
9179     } else {
9180       assert(CI.capturesVariable() && "Expected captured reference.");
9181       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9182       QualType ElementType = PtrTy->getPointeeType();
9183       CurSizes.push_back(CGF.Builder.CreateIntCast(
9184           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9185       // The default map type for a scalar/complex type is 'to' because by
9186       // default the value doesn't have to be retrieved. For an aggregate
9187       // type, the default is 'tofrom'.
9188       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
9189       const VarDecl *VD = CI.getCapturedVar();
9190       auto I = FirstPrivateDecls.find(VD);
9191       if (I != FirstPrivateDecls.end() &&
9192           VD->getType().isConstant(CGF.getContext())) {
9193         llvm::Constant *Addr =
9194             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9195         // Copy the value of the original variable to the new global copy.
9196         CGF.Builder.CreateMemCpy(
9197             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9198             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9199             CurSizes.back(), /*IsVolatile=*/false);
9200         // Use new global variable as the base pointers.
9201         CurBasePointers.push_back(Addr);
9202         CurPointers.push_back(Addr);
9203       } else {
9204         CurBasePointers.push_back(CV);
9205         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9206           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9207               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9208               AlignmentSource::Decl));
9209           CurPointers.push_back(PtrAddr.getPointer());
9210         } else {
9211           CurPointers.push_back(CV);
9212         }
9213       }
9214       if (I != FirstPrivateDecls.end())
9215         IsImplicit = I->getSecond();
9216     }
9217     // Every default map produces a single argument which is a target parameter.
9218     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
9219 
9220     // Add flag stating this is an implicit map.
9221     if (IsImplicit)
9222       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
9223   }
9224 };
9225 } // anonymous namespace
9226 
9227 /// Emit the arrays used to pass the captures and map information to the
9228 /// offloading runtime library. If there is no map or capture information,
9229 /// return nullptr by reference.
9230 static void
9231 emitOffloadingArrays(CodeGenFunction &CGF,
9232                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
9233                      MappableExprsHandler::MapValuesArrayTy &Pointers,
9234                      MappableExprsHandler::MapValuesArrayTy &Sizes,
9235                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
9236                      CGOpenMPRuntime::TargetDataInfo &Info) {
9237   CodeGenModule &CGM = CGF.CGM;
9238   ASTContext &Ctx = CGF.getContext();
9239 
9240   // Reset the array information.
9241   Info.clearArrayInfo();
9242   Info.NumberOfPtrs = BasePointers.size();
9243 
9244   if (Info.NumberOfPtrs) {
9245     // Detect if we have any capture size requiring runtime evaluation of the
9246     // size so that a constant array could be eventually used.
9247     bool hasRuntimeEvaluationCaptureSize = false;
9248     for (llvm::Value *S : Sizes)
9249       if (!isa<llvm::Constant>(S)) {
9250         hasRuntimeEvaluationCaptureSize = true;
9251         break;
9252       }
9253 
9254     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9255     QualType PointerArrayType = Ctx.getConstantArrayType(
9256         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9257         /*IndexTypeQuals=*/0);
9258 
9259     Info.BasePointersArray =
9260         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9261     Info.PointersArray =
9262         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9263 
9264     // If we don't have any VLA types or other types that require runtime
9265     // evaluation, we can use a constant array for the map sizes, otherwise we
9266     // need to fill up the arrays as we do for the pointers.
9267     QualType Int64Ty =
9268         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9269     if (hasRuntimeEvaluationCaptureSize) {
9270       QualType SizeArrayType = Ctx.getConstantArrayType(
9271           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9272           /*IndexTypeQuals=*/0);
9273       Info.SizesArray =
9274           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9275     } else {
9276       // We expect all the sizes to be constant, so we collect them to create
9277       // a constant array.
9278       SmallVector<llvm::Constant *, 16> ConstSizes;
9279       for (llvm::Value *S : Sizes)
9280         ConstSizes.push_back(cast<llvm::Constant>(S));
9281 
9282       auto *SizesArrayInit = llvm::ConstantArray::get(
9283           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9284       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9285       auto *SizesArrayGbl = new llvm::GlobalVariable(
9286           CGM.getModule(), SizesArrayInit->getType(),
9287           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9288           SizesArrayInit, Name);
9289       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9290       Info.SizesArray = SizesArrayGbl;
9291     }
9292 
9293     // The map types are always constant so we don't need to generate code to
9294     // fill arrays. Instead, we create an array constant.
9295     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
9296     llvm::copy(MapTypes, Mapping.begin());
9297     llvm::Constant *MapTypesArrayInit =
9298         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9299     std::string MaptypesName =
9300         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9301     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9302         CGM.getModule(), MapTypesArrayInit->getType(),
9303         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9304         MapTypesArrayInit, MaptypesName);
9305     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9306     Info.MapTypesArray = MapTypesArrayGbl;
9307 
9308     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9309       llvm::Value *BPVal = *BasePointers[I];
9310       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9311           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9312           Info.BasePointersArray, 0, I);
9313       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9314           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9315       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9316       CGF.Builder.CreateStore(BPVal, BPAddr);
9317 
9318       if (Info.requiresDevicePointerInfo())
9319         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
9320           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9321 
9322       llvm::Value *PVal = Pointers[I];
9323       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9324           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9325           Info.PointersArray, 0, I);
9326       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9327           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9328       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9329       CGF.Builder.CreateStore(PVal, PAddr);
9330 
9331       if (hasRuntimeEvaluationCaptureSize) {
9332         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9333             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9334             Info.SizesArray,
9335             /*Idx0=*/0,
9336             /*Idx1=*/I);
9337         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9338         CGF.Builder.CreateStore(
9339             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
9340             SAddr);
9341       }
9342     }
9343   }
9344 }
9345 
9346 /// Emit the arguments to be passed to the runtime library based on the
9347 /// arrays of pointers, sizes and map types.
9348 static void emitOffloadingArraysArgument(
9349     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9350     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9351     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
9352   CodeGenModule &CGM = CGF.CGM;
9353   if (Info.NumberOfPtrs) {
9354     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9355         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9356         Info.BasePointersArray,
9357         /*Idx0=*/0, /*Idx1=*/0);
9358     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9359         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9360         Info.PointersArray,
9361         /*Idx0=*/0,
9362         /*Idx1=*/0);
9363     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9364         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9365         /*Idx0=*/0, /*Idx1=*/0);
9366     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9367         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9368         Info.MapTypesArray,
9369         /*Idx0=*/0,
9370         /*Idx1=*/0);
9371   } else {
9372     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9373     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9374     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9375     MapTypesArrayArg =
9376         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9377   }
9378 }
9379 
9380 /// Check for inner distribute directive.
9381 static const OMPExecutableDirective *
9382 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9383   const auto *CS = D.getInnermostCapturedStmt();
9384   const auto *Body =
9385       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9386   const Stmt *ChildStmt =
9387       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9388 
9389   if (const auto *NestedDir =
9390           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9391     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9392     switch (D.getDirectiveKind()) {
9393     case OMPD_target:
9394       if (isOpenMPDistributeDirective(DKind))
9395         return NestedDir;
9396       if (DKind == OMPD_teams) {
9397         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9398             /*IgnoreCaptured=*/true);
9399         if (!Body)
9400           return nullptr;
9401         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9402         if (const auto *NND =
9403                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9404           DKind = NND->getDirectiveKind();
9405           if (isOpenMPDistributeDirective(DKind))
9406             return NND;
9407         }
9408       }
9409       return nullptr;
9410     case OMPD_target_teams:
9411       if (isOpenMPDistributeDirective(DKind))
9412         return NestedDir;
9413       return nullptr;
9414     case OMPD_target_parallel:
9415     case OMPD_target_simd:
9416     case OMPD_target_parallel_for:
9417     case OMPD_target_parallel_for_simd:
9418       return nullptr;
9419     case OMPD_target_teams_distribute:
9420     case OMPD_target_teams_distribute_simd:
9421     case OMPD_target_teams_distribute_parallel_for:
9422     case OMPD_target_teams_distribute_parallel_for_simd:
9423     case OMPD_parallel:
9424     case OMPD_for:
9425     case OMPD_parallel_for:
9426     case OMPD_parallel_master:
9427     case OMPD_parallel_sections:
9428     case OMPD_for_simd:
9429     case OMPD_parallel_for_simd:
9430     case OMPD_cancel:
9431     case OMPD_cancellation_point:
9432     case OMPD_ordered:
9433     case OMPD_threadprivate:
9434     case OMPD_allocate:
9435     case OMPD_task:
9436     case OMPD_simd:
9437     case OMPD_sections:
9438     case OMPD_section:
9439     case OMPD_single:
9440     case OMPD_master:
9441     case OMPD_critical:
9442     case OMPD_taskyield:
9443     case OMPD_barrier:
9444     case OMPD_taskwait:
9445     case OMPD_taskgroup:
9446     case OMPD_atomic:
9447     case OMPD_flush:
9448     case OMPD_depobj:
9449     case OMPD_scan:
9450     case OMPD_teams:
9451     case OMPD_target_data:
9452     case OMPD_target_exit_data:
9453     case OMPD_target_enter_data:
9454     case OMPD_distribute:
9455     case OMPD_distribute_simd:
9456     case OMPD_distribute_parallel_for:
9457     case OMPD_distribute_parallel_for_simd:
9458     case OMPD_teams_distribute:
9459     case OMPD_teams_distribute_simd:
9460     case OMPD_teams_distribute_parallel_for:
9461     case OMPD_teams_distribute_parallel_for_simd:
9462     case OMPD_target_update:
9463     case OMPD_declare_simd:
9464     case OMPD_declare_variant:
9465     case OMPD_begin_declare_variant:
9466     case OMPD_end_declare_variant:
9467     case OMPD_declare_target:
9468     case OMPD_end_declare_target:
9469     case OMPD_declare_reduction:
9470     case OMPD_declare_mapper:
9471     case OMPD_taskloop:
9472     case OMPD_taskloop_simd:
9473     case OMPD_master_taskloop:
9474     case OMPD_master_taskloop_simd:
9475     case OMPD_parallel_master_taskloop:
9476     case OMPD_parallel_master_taskloop_simd:
9477     case OMPD_requires:
9478     case OMPD_unknown:
9479       llvm_unreachable("Unexpected directive.");
9480     }
9481   }
9482 
9483   return nullptr;
9484 }
9485 
9486 /// Emit the user-defined mapper function. The code generation follows the
9487 /// pattern in the example below.
9488 /// \code
9489 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9490 ///                                           void *base, void *begin,
9491 ///                                           int64_t size, int64_t type) {
9492 ///   // Allocate space for an array section first.
9493 ///   if (size > 1 && !maptype.IsDelete)
9494 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9495 ///                                 size*sizeof(Ty), clearToFrom(type));
9496 ///   // Map members.
9497 ///   for (unsigned i = 0; i < size; i++) {
9498 ///     // For each component specified by this mapper:
9499 ///     for (auto c : all_components) {
9500 ///       if (c.hasMapper())
9501 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9502 ///                       c.arg_type);
9503 ///       else
9504 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9505 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9506 ///     }
9507 ///   }
9508 ///   // Delete the array section.
9509 ///   if (size > 1 && maptype.IsDelete)
9510 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9511 ///                                 size*sizeof(Ty), clearToFrom(type));
9512 /// }
9513 /// \endcode
9514 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9515                                             CodeGenFunction *CGF) {
9516   if (UDMMap.count(D) > 0)
9517     return;
9518   ASTContext &C = CGM.getContext();
9519   QualType Ty = D->getType();
9520   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9521   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9522   auto *MapperVarDecl =
9523       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9524   SourceLocation Loc = D->getLocation();
9525   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9526 
9527   // Prepare mapper function arguments and attributes.
9528   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9529                               C.VoidPtrTy, ImplicitParamDecl::Other);
9530   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9531                             ImplicitParamDecl::Other);
9532   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9533                              C.VoidPtrTy, ImplicitParamDecl::Other);
9534   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9535                             ImplicitParamDecl::Other);
9536   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9537                             ImplicitParamDecl::Other);
9538   FunctionArgList Args;
9539   Args.push_back(&HandleArg);
9540   Args.push_back(&BaseArg);
9541   Args.push_back(&BeginArg);
9542   Args.push_back(&SizeArg);
9543   Args.push_back(&TypeArg);
9544   const CGFunctionInfo &FnInfo =
9545       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9546   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9547   SmallString<64> TyStr;
9548   llvm::raw_svector_ostream Out(TyStr);
9549   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9550   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9551   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9552                                     Name, &CGM.getModule());
9553   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9554   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9555   // Start the mapper function code generation.
9556   CodeGenFunction MapperCGF(CGM);
9557   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9558   // Compute the starting and end addreses of array elements.
9559   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9560       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9561       C.getPointerType(Int64Ty), Loc);
9562   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9563       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9564       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9565   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9566   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9567       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9568       C.getPointerType(Int64Ty), Loc);
9569   // Prepare common arguments for array initiation and deletion.
9570   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9571       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9572       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9573   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9574       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9575       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9576   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9577       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9578       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9579 
9580   // Emit array initiation if this is an array section and \p MapType indicates
9581   // that memory allocation is required.
9582   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9583   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9584                              ElementSize, HeadBB, /*IsInit=*/true);
9585 
9586   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9587 
9588   // Emit the loop header block.
9589   MapperCGF.EmitBlock(HeadBB);
9590   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9591   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9592   // Evaluate whether the initial condition is satisfied.
9593   llvm::Value *IsEmpty =
9594       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9595   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9596   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9597 
9598   // Emit the loop body block.
9599   MapperCGF.EmitBlock(BodyBB);
9600   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9601       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9602   PtrPHI->addIncoming(PtrBegin, EntryBB);
9603   Address PtrCurrent =
9604       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9605                           .getAlignment()
9606                           .alignmentOfArrayElement(ElementSize));
9607   // Privatize the declared variable of mapper to be the current array element.
9608   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9609   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9610     return MapperCGF
9611         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9612         .getAddress(MapperCGF);
9613   });
9614   (void)Scope.Privatize();
9615 
9616   // Get map clause information. Fill up the arrays with all mapped variables.
9617   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9618   MappableExprsHandler::MapValuesArrayTy Pointers;
9619   MappableExprsHandler::MapValuesArrayTy Sizes;
9620   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9621   MappableExprsHandler MEHandler(*D, MapperCGF);
9622   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9623 
9624   // Call the runtime API __tgt_mapper_num_components to get the number of
9625   // pre-existing components.
9626   llvm::Value *OffloadingArgs[] = {Handle};
9627   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9628       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9629   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9630       PreviousSize,
9631       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9632 
9633   // Fill up the runtime mapper handle for all components.
9634   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9635     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9636         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9637     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9638         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9639     llvm::Value *CurSizeArg = Sizes[I];
9640 
9641     // Extract the MEMBER_OF field from the map type.
9642     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9643     MapperCGF.EmitBlock(MemberBB);
9644     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9645     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9646         OriMapType,
9647         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9648     llvm::BasicBlock *MemberCombineBB =
9649         MapperCGF.createBasicBlock("omp.member.combine");
9650     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9651     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9652     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9653     // Add the number of pre-existing components to the MEMBER_OF field if it
9654     // is valid.
9655     MapperCGF.EmitBlock(MemberCombineBB);
9656     llvm::Value *CombinedMember =
9657         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9658     // Do nothing if it is not a member of previous components.
9659     MapperCGF.EmitBlock(TypeBB);
9660     llvm::PHINode *MemberMapType =
9661         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9662     MemberMapType->addIncoming(OriMapType, MemberBB);
9663     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9664 
9665     // Combine the map type inherited from user-defined mapper with that
9666     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9667     // bits of the \a MapType, which is the input argument of the mapper
9668     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9669     // bits of MemberMapType.
9670     // [OpenMP 5.0], 1.2.6. map-type decay.
9671     //        | alloc |  to   | from  | tofrom | release | delete
9672     // ----------------------------------------------------------
9673     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9674     // to     | alloc |  to   | alloc |   to   | release | delete
9675     // from   | alloc | alloc | from  |  from  | release | delete
9676     // tofrom | alloc |  to   | from  | tofrom | release | delete
9677     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9678         MapType,
9679         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9680                                    MappableExprsHandler::OMP_MAP_FROM));
9681     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9682     llvm::BasicBlock *AllocElseBB =
9683         MapperCGF.createBasicBlock("omp.type.alloc.else");
9684     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9685     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9686     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9687     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9688     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9689     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9690     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9691     MapperCGF.EmitBlock(AllocBB);
9692     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9693         MemberMapType,
9694         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9695                                      MappableExprsHandler::OMP_MAP_FROM)));
9696     MapperCGF.Builder.CreateBr(EndBB);
9697     MapperCGF.EmitBlock(AllocElseBB);
9698     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9699         LeftToFrom,
9700         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9701     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9702     // In case of to, clear OMP_MAP_FROM.
9703     MapperCGF.EmitBlock(ToBB);
9704     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9705         MemberMapType,
9706         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9707     MapperCGF.Builder.CreateBr(EndBB);
9708     MapperCGF.EmitBlock(ToElseBB);
9709     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9710         LeftToFrom,
9711         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9712     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9713     // In case of from, clear OMP_MAP_TO.
9714     MapperCGF.EmitBlock(FromBB);
9715     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9716         MemberMapType,
9717         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9718     // In case of tofrom, do nothing.
9719     MapperCGF.EmitBlock(EndBB);
9720     llvm::PHINode *CurMapType =
9721         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9722     CurMapType->addIncoming(AllocMapType, AllocBB);
9723     CurMapType->addIncoming(ToMapType, ToBB);
9724     CurMapType->addIncoming(FromMapType, FromBB);
9725     CurMapType->addIncoming(MemberMapType, ToElseBB);
9726 
9727     // TODO: call the corresponding mapper function if a user-defined mapper is
9728     // associated with this map clause.
9729     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9730     // data structure.
9731     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9732                                      CurSizeArg, CurMapType};
9733     MapperCGF.EmitRuntimeCall(
9734         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9735         OffloadingArgs);
9736   }
9737 
9738   // Update the pointer to point to the next element that needs to be mapped,
9739   // and check whether we have mapped all elements.
9740   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9741       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9742   PtrPHI->addIncoming(PtrNext, BodyBB);
9743   llvm::Value *IsDone =
9744       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9745   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9746   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9747 
9748   MapperCGF.EmitBlock(ExitBB);
9749   // Emit array deletion if this is an array section and \p MapType indicates
9750   // that deletion is required.
9751   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9752                              ElementSize, DoneBB, /*IsInit=*/false);
9753 
9754   // Emit the function exit block.
9755   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9756   MapperCGF.FinishFunction();
9757   UDMMap.try_emplace(D, Fn);
9758   if (CGF) {
9759     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9760     Decls.second.push_back(D);
9761   }
9762 }
9763 
9764 /// Emit the array initialization or deletion portion for user-defined mapper
9765 /// code generation. First, it evaluates whether an array section is mapped and
9766 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9767 /// true, and \a MapType indicates to not delete this array, array
9768 /// initialization code is generated. If \a IsInit is false, and \a MapType
9769 /// indicates to not this array, array deletion code is generated.
9770 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9771     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9772     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9773     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9774   StringRef Prefix = IsInit ? ".init" : ".del";
9775 
9776   // Evaluate if this is an array section.
9777   llvm::BasicBlock *IsDeleteBB =
9778       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9779   llvm::BasicBlock *BodyBB =
9780       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9781   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9782       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9783   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9784 
9785   // Evaluate if we are going to delete this section.
9786   MapperCGF.EmitBlock(IsDeleteBB);
9787   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9788       MapType,
9789       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9790   llvm::Value *DeleteCond;
9791   if (IsInit) {
9792     DeleteCond = MapperCGF.Builder.CreateIsNull(
9793         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9794   } else {
9795     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9796         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9797   }
9798   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9799 
9800   MapperCGF.EmitBlock(BodyBB);
9801   // Get the array size by multiplying element size and element number (i.e., \p
9802   // Size).
9803   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9804       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9805   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9806   // memory allocation/deletion purpose only.
9807   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9808       MapType,
9809       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9810                                    MappableExprsHandler::OMP_MAP_FROM)));
9811   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9812   // data structure.
9813   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9814   MapperCGF.EmitRuntimeCall(
9815       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9816 }
9817 
9818 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9819     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9820     llvm::Value *DeviceID,
9821     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9822                                      const OMPLoopDirective &D)>
9823         SizeEmitter) {
9824   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9825   const OMPExecutableDirective *TD = &D;
9826   // Get nested teams distribute kind directive, if any.
9827   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9828     TD = getNestedDistributeDirective(CGM.getContext(), D);
9829   if (!TD)
9830     return;
9831   const auto *LD = cast<OMPLoopDirective>(TD);
9832   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9833                                                      PrePostActionTy &) {
9834     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9835       llvm::Value *Args[] = {DeviceID, NumIterations};
9836       CGF.EmitRuntimeCall(
9837           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9838     }
9839   };
9840   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9841 }
9842 
9843 void CGOpenMPRuntime::emitTargetCall(
9844     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9845     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9846     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9847     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9848                                      const OMPLoopDirective &D)>
9849         SizeEmitter) {
9850   if (!CGF.HaveInsertPoint())
9851     return;
9852 
9853   assert(OutlinedFn && "Invalid outlined function!");
9854 
9855   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9856   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9857   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9858   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9859                                             PrePostActionTy &) {
9860     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9861   };
9862   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9863 
9864   CodeGenFunction::OMPTargetDataInfo InputInfo;
9865   llvm::Value *MapTypesArray = nullptr;
9866   // Fill up the pointer arrays and transfer execution to the device.
9867   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9868                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9869                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9870     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9871       // Reverse offloading is not supported, so just execute on the host.
9872       if (RequiresOuterTask) {
9873         CapturedVars.clear();
9874         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9875       }
9876       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9877       return;
9878     }
9879 
9880     // On top of the arrays that were filled up, the target offloading call
9881     // takes as arguments the device id as well as the host pointer. The host
9882     // pointer is used by the runtime library to identify the current target
9883     // region, so it only has to be unique and not necessarily point to
9884     // anything. It could be the pointer to the outlined function that
9885     // implements the target region, but we aren't using that so that the
9886     // compiler doesn't need to keep that, and could therefore inline the host
9887     // function if proven worthwhile during optimization.
9888 
9889     // From this point on, we need to have an ID of the target region defined.
9890     assert(OutlinedFnID && "Invalid outlined function ID!");
9891 
9892     // Emit device ID if any.
9893     llvm::Value *DeviceID;
9894     if (Device.getPointer()) {
9895       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9896               Device.getInt() == OMPC_DEVICE_device_num) &&
9897              "Expected device_num modifier.");
9898       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9899       DeviceID =
9900           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9901     } else {
9902       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9903     }
9904 
9905     // Emit the number of elements in the offloading arrays.
9906     llvm::Value *PointerNum =
9907         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9908 
9909     // Return value of the runtime offloading call.
9910     llvm::Value *Return;
9911 
9912     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9913     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9914 
9915     // Emit tripcount for the target loop-based directive.
9916     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9917 
9918     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9919     // The target region is an outlined function launched by the runtime
9920     // via calls __tgt_target() or __tgt_target_teams().
9921     //
9922     // __tgt_target() launches a target region with one team and one thread,
9923     // executing a serial region.  This master thread may in turn launch
9924     // more threads within its team upon encountering a parallel region,
9925     // however, no additional teams can be launched on the device.
9926     //
9927     // __tgt_target_teams() launches a target region with one or more teams,
9928     // each with one or more threads.  This call is required for target
9929     // constructs such as:
9930     //  'target teams'
9931     //  'target' / 'teams'
9932     //  'target teams distribute parallel for'
9933     //  'target parallel'
9934     // and so on.
9935     //
9936     // Note that on the host and CPU targets, the runtime implementation of
9937     // these calls simply call the outlined function without forking threads.
9938     // The outlined functions themselves have runtime calls to
9939     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9940     // the compiler in emitTeamsCall() and emitParallelCall().
9941     //
9942     // In contrast, on the NVPTX target, the implementation of
9943     // __tgt_target_teams() launches a GPU kernel with the requested number
9944     // of teams and threads so no additional calls to the runtime are required.
9945     if (NumTeams) {
9946       // If we have NumTeams defined this means that we have an enclosed teams
9947       // region. Therefore we also expect to have NumThreads defined. These two
9948       // values should be defined in the presence of a teams directive,
9949       // regardless of having any clauses associated. If the user is using teams
9950       // but no clauses, these two values will be the default that should be
9951       // passed to the runtime library - a 32-bit integer with the value zero.
9952       assert(NumThreads && "Thread limit expression should be available along "
9953                            "with number of teams.");
9954       llvm::Value *OffloadingArgs[] = {DeviceID,
9955                                        OutlinedFnID,
9956                                        PointerNum,
9957                                        InputInfo.BasePointersArray.getPointer(),
9958                                        InputInfo.PointersArray.getPointer(),
9959                                        InputInfo.SizesArray.getPointer(),
9960                                        MapTypesArray,
9961                                        NumTeams,
9962                                        NumThreads};
9963       Return = CGF.EmitRuntimeCall(
9964           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9965                                           : OMPRTL__tgt_target_teams),
9966           OffloadingArgs);
9967     } else {
9968       llvm::Value *OffloadingArgs[] = {DeviceID,
9969                                        OutlinedFnID,
9970                                        PointerNum,
9971                                        InputInfo.BasePointersArray.getPointer(),
9972                                        InputInfo.PointersArray.getPointer(),
9973                                        InputInfo.SizesArray.getPointer(),
9974                                        MapTypesArray};
9975       Return = CGF.EmitRuntimeCall(
9976           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9977                                           : OMPRTL__tgt_target),
9978           OffloadingArgs);
9979     }
9980 
9981     // Check the error code and execute the host version if required.
9982     llvm::BasicBlock *OffloadFailedBlock =
9983         CGF.createBasicBlock("omp_offload.failed");
9984     llvm::BasicBlock *OffloadContBlock =
9985         CGF.createBasicBlock("omp_offload.cont");
9986     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9987     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9988 
9989     CGF.EmitBlock(OffloadFailedBlock);
9990     if (RequiresOuterTask) {
9991       CapturedVars.clear();
9992       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9993     }
9994     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9995     CGF.EmitBranch(OffloadContBlock);
9996 
9997     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9998   };
9999 
10000   // Notify that the host version must be executed.
10001   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10002                     RequiresOuterTask](CodeGenFunction &CGF,
10003                                        PrePostActionTy &) {
10004     if (RequiresOuterTask) {
10005       CapturedVars.clear();
10006       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10007     }
10008     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10009   };
10010 
10011   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10012                           &CapturedVars, RequiresOuterTask,
10013                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10014     // Fill up the arrays with all the captured variables.
10015     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10016     MappableExprsHandler::MapValuesArrayTy Pointers;
10017     MappableExprsHandler::MapValuesArrayTy Sizes;
10018     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10019 
10020     // Get mappable expression information.
10021     MappableExprsHandler MEHandler(D, CGF);
10022     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10023 
10024     auto RI = CS.getCapturedRecordDecl()->field_begin();
10025     auto CV = CapturedVars.begin();
10026     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10027                                               CE = CS.capture_end();
10028          CI != CE; ++CI, ++RI, ++CV) {
10029       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
10030       MappableExprsHandler::MapValuesArrayTy CurPointers;
10031       MappableExprsHandler::MapValuesArrayTy CurSizes;
10032       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
10033       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10034 
10035       // VLA sizes are passed to the outlined region by copy and do not have map
10036       // information associated.
10037       if (CI->capturesVariableArrayType()) {
10038         CurBasePointers.push_back(*CV);
10039         CurPointers.push_back(*CV);
10040         CurSizes.push_back(CGF.Builder.CreateIntCast(
10041             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10042         // Copy to the device as an argument. No need to retrieve it.
10043         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10044                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10045                               MappableExprsHandler::OMP_MAP_IMPLICIT);
10046       } else {
10047         // If we have any information in the map clause, we use it, otherwise we
10048         // just do a default mapping.
10049         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
10050                                          CurSizes, CurMapTypes, PartialStruct);
10051         if (CurBasePointers.empty())
10052           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
10053                                            CurPointers, CurSizes, CurMapTypes);
10054         // Generate correct mapping for variables captured by reference in
10055         // lambdas.
10056         if (CI->capturesVariable())
10057           MEHandler.generateInfoForLambdaCaptures(
10058               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
10059               CurMapTypes, LambdaPointers);
10060       }
10061       // We expect to have at least an element of information for this capture.
10062       assert(!CurBasePointers.empty() &&
10063              "Non-existing map pointer for capture!");
10064       assert(CurBasePointers.size() == CurPointers.size() &&
10065              CurBasePointers.size() == CurSizes.size() &&
10066              CurBasePointers.size() == CurMapTypes.size() &&
10067              "Inconsistent map information sizes!");
10068 
10069       // If there is an entry in PartialStruct it means we have a struct with
10070       // individual members mapped. Emit an extra combined entry.
10071       if (PartialStruct.Base.isValid())
10072         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
10073                                     CurMapTypes, PartialStruct);
10074 
10075       // We need to append the results of this capture to what we already have.
10076       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
10077       Pointers.append(CurPointers.begin(), CurPointers.end());
10078       Sizes.append(CurSizes.begin(), CurSizes.end());
10079       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
10080     }
10081     // Adjust MEMBER_OF flags for the lambdas captures.
10082     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
10083                                               Pointers, MapTypes);
10084     // Map other list items in the map clause which are not captured variables
10085     // but "declare target link" global variables.
10086     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
10087                                                MapTypes);
10088 
10089     TargetDataInfo Info;
10090     // Fill up the arrays and create the arguments.
10091     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10092     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10093                                  Info.PointersArray, Info.SizesArray,
10094                                  Info.MapTypesArray, Info);
10095     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10096     InputInfo.BasePointersArray =
10097         Address(Info.BasePointersArray, CGM.getPointerAlign());
10098     InputInfo.PointersArray =
10099         Address(Info.PointersArray, CGM.getPointerAlign());
10100     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10101     MapTypesArray = Info.MapTypesArray;
10102     if (RequiresOuterTask)
10103       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10104     else
10105       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10106   };
10107 
10108   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10109                              CodeGenFunction &CGF, PrePostActionTy &) {
10110     if (RequiresOuterTask) {
10111       CodeGenFunction::OMPTargetDataInfo InputInfo;
10112       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10113     } else {
10114       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10115     }
10116   };
10117 
10118   // If we have a target function ID it means that we need to support
10119   // offloading, otherwise, just execute on the host. We need to execute on host
10120   // regardless of the conditional in the if clause if, e.g., the user do not
10121   // specify target triples.
10122   if (OutlinedFnID) {
10123     if (IfCond) {
10124       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10125     } else {
10126       RegionCodeGenTy ThenRCG(TargetThenGen);
10127       ThenRCG(CGF);
10128     }
10129   } else {
10130     RegionCodeGenTy ElseRCG(TargetElseGen);
10131     ElseRCG(CGF);
10132   }
10133 }
10134 
10135 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10136                                                     StringRef ParentName) {
10137   if (!S)
10138     return;
10139 
10140   // Codegen OMP target directives that offload compute to the device.
10141   bool RequiresDeviceCodegen =
10142       isa<OMPExecutableDirective>(S) &&
10143       isOpenMPTargetExecutionDirective(
10144           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10145 
10146   if (RequiresDeviceCodegen) {
10147     const auto &E = *cast<OMPExecutableDirective>(S);
10148     unsigned DeviceID;
10149     unsigned FileID;
10150     unsigned Line;
10151     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10152                              FileID, Line);
10153 
10154     // Is this a target region that should not be emitted as an entry point? If
10155     // so just signal we are done with this target region.
10156     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10157                                                             ParentName, Line))
10158       return;
10159 
10160     switch (E.getDirectiveKind()) {
10161     case OMPD_target:
10162       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10163                                                    cast<OMPTargetDirective>(E));
10164       break;
10165     case OMPD_target_parallel:
10166       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10167           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10168       break;
10169     case OMPD_target_teams:
10170       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10171           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10172       break;
10173     case OMPD_target_teams_distribute:
10174       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10175           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10176       break;
10177     case OMPD_target_teams_distribute_simd:
10178       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10179           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10180       break;
10181     case OMPD_target_parallel_for:
10182       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10183           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10184       break;
10185     case OMPD_target_parallel_for_simd:
10186       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10187           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10188       break;
10189     case OMPD_target_simd:
10190       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10191           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10192       break;
10193     case OMPD_target_teams_distribute_parallel_for:
10194       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10195           CGM, ParentName,
10196           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10197       break;
10198     case OMPD_target_teams_distribute_parallel_for_simd:
10199       CodeGenFunction::
10200           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10201               CGM, ParentName,
10202               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10203       break;
10204     case OMPD_parallel:
10205     case OMPD_for:
10206     case OMPD_parallel_for:
10207     case OMPD_parallel_master:
10208     case OMPD_parallel_sections:
10209     case OMPD_for_simd:
10210     case OMPD_parallel_for_simd:
10211     case OMPD_cancel:
10212     case OMPD_cancellation_point:
10213     case OMPD_ordered:
10214     case OMPD_threadprivate:
10215     case OMPD_allocate:
10216     case OMPD_task:
10217     case OMPD_simd:
10218     case OMPD_sections:
10219     case OMPD_section:
10220     case OMPD_single:
10221     case OMPD_master:
10222     case OMPD_critical:
10223     case OMPD_taskyield:
10224     case OMPD_barrier:
10225     case OMPD_taskwait:
10226     case OMPD_taskgroup:
10227     case OMPD_atomic:
10228     case OMPD_flush:
10229     case OMPD_depobj:
10230     case OMPD_scan:
10231     case OMPD_teams:
10232     case OMPD_target_data:
10233     case OMPD_target_exit_data:
10234     case OMPD_target_enter_data:
10235     case OMPD_distribute:
10236     case OMPD_distribute_simd:
10237     case OMPD_distribute_parallel_for:
10238     case OMPD_distribute_parallel_for_simd:
10239     case OMPD_teams_distribute:
10240     case OMPD_teams_distribute_simd:
10241     case OMPD_teams_distribute_parallel_for:
10242     case OMPD_teams_distribute_parallel_for_simd:
10243     case OMPD_target_update:
10244     case OMPD_declare_simd:
10245     case OMPD_declare_variant:
10246     case OMPD_begin_declare_variant:
10247     case OMPD_end_declare_variant:
10248     case OMPD_declare_target:
10249     case OMPD_end_declare_target:
10250     case OMPD_declare_reduction:
10251     case OMPD_declare_mapper:
10252     case OMPD_taskloop:
10253     case OMPD_taskloop_simd:
10254     case OMPD_master_taskloop:
10255     case OMPD_master_taskloop_simd:
10256     case OMPD_parallel_master_taskloop:
10257     case OMPD_parallel_master_taskloop_simd:
10258     case OMPD_requires:
10259     case OMPD_unknown:
10260       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10261     }
10262     return;
10263   }
10264 
10265   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10266     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10267       return;
10268 
10269     scanForTargetRegionsFunctions(
10270         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
10271     return;
10272   }
10273 
10274   // If this is a lambda function, look into its body.
10275   if (const auto *L = dyn_cast<LambdaExpr>(S))
10276     S = L->getBody();
10277 
10278   // Keep looking for target regions recursively.
10279   for (const Stmt *II : S->children())
10280     scanForTargetRegionsFunctions(II, ParentName);
10281 }
10282 
10283 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10284   // If emitting code for the host, we do not process FD here. Instead we do
10285   // the normal code generation.
10286   if (!CGM.getLangOpts().OpenMPIsDevice) {
10287     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10288       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10289           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10290       // Do not emit device_type(nohost) functions for the host.
10291       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10292         return true;
10293     }
10294     return false;
10295   }
10296 
10297   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10298   // Try to detect target regions in the function.
10299   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10300     StringRef Name = CGM.getMangledName(GD);
10301     scanForTargetRegionsFunctions(FD->getBody(), Name);
10302     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10303         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10304     // Do not emit device_type(nohost) functions for the host.
10305     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10306       return true;
10307   }
10308 
10309   // Do not to emit function if it is not marked as declare target.
10310   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10311          AlreadyEmittedTargetDecls.count(VD) == 0;
10312 }
10313 
10314 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10315   if (!CGM.getLangOpts().OpenMPIsDevice)
10316     return false;
10317 
10318   // Check if there are Ctors/Dtors in this declaration and look for target
10319   // regions in it. We use the complete variant to produce the kernel name
10320   // mangling.
10321   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10322   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10323     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10324       StringRef ParentName =
10325           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10326       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10327     }
10328     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10329       StringRef ParentName =
10330           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10331       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10332     }
10333   }
10334 
10335   // Do not to emit variable if it is not marked as declare target.
10336   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10337       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10338           cast<VarDecl>(GD.getDecl()));
10339   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10340       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10341        HasRequiresUnifiedSharedMemory)) {
10342     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10343     return true;
10344   }
10345   return false;
10346 }
10347 
10348 llvm::Constant *
10349 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10350                                                 const VarDecl *VD) {
10351   assert(VD->getType().isConstant(CGM.getContext()) &&
10352          "Expected constant variable.");
10353   StringRef VarName;
10354   llvm::Constant *Addr;
10355   llvm::GlobalValue::LinkageTypes Linkage;
10356   QualType Ty = VD->getType();
10357   SmallString<128> Buffer;
10358   {
10359     unsigned DeviceID;
10360     unsigned FileID;
10361     unsigned Line;
10362     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10363                              FileID, Line);
10364     llvm::raw_svector_ostream OS(Buffer);
10365     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10366        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10367     VarName = OS.str();
10368   }
10369   Linkage = llvm::GlobalValue::InternalLinkage;
10370   Addr =
10371       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10372                                   getDefaultFirstprivateAddressSpace());
10373   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10374   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10375   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10376   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10377       VarName, Addr, VarSize,
10378       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10379   return Addr;
10380 }
10381 
10382 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10383                                                    llvm::Constant *Addr) {
10384   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10385       !CGM.getLangOpts().OpenMPIsDevice)
10386     return;
10387   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10388       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10389   if (!Res) {
10390     if (CGM.getLangOpts().OpenMPIsDevice) {
10391       // Register non-target variables being emitted in device code (debug info
10392       // may cause this).
10393       StringRef VarName = CGM.getMangledName(VD);
10394       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10395     }
10396     return;
10397   }
10398   // Register declare target variables.
10399   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10400   StringRef VarName;
10401   CharUnits VarSize;
10402   llvm::GlobalValue::LinkageTypes Linkage;
10403 
10404   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10405       !HasRequiresUnifiedSharedMemory) {
10406     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10407     VarName = CGM.getMangledName(VD);
10408     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10409       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10410       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10411     } else {
10412       VarSize = CharUnits::Zero();
10413     }
10414     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10415     // Temp solution to prevent optimizations of the internal variables.
10416     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10417       std::string RefName = getName({VarName, "ref"});
10418       if (!CGM.GetGlobalValue(RefName)) {
10419         llvm::Constant *AddrRef =
10420             getOrCreateInternalVariable(Addr->getType(), RefName);
10421         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10422         GVAddrRef->setConstant(/*Val=*/true);
10423         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10424         GVAddrRef->setInitializer(Addr);
10425         CGM.addCompilerUsedGlobal(GVAddrRef);
10426       }
10427     }
10428   } else {
10429     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10430             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10431              HasRequiresUnifiedSharedMemory)) &&
10432            "Declare target attribute must link or to with unified memory.");
10433     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10434       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10435     else
10436       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10437 
10438     if (CGM.getLangOpts().OpenMPIsDevice) {
10439       VarName = Addr->getName();
10440       Addr = nullptr;
10441     } else {
10442       VarName = getAddrOfDeclareTargetVar(VD).getName();
10443       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10444     }
10445     VarSize = CGM.getPointerSize();
10446     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10447   }
10448 
10449   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10450       VarName, Addr, VarSize, Flags, Linkage);
10451 }
10452 
10453 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10454   if (isa<FunctionDecl>(GD.getDecl()) ||
10455       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10456     return emitTargetFunctions(GD);
10457 
10458   return emitTargetGlobalVariable(GD);
10459 }
10460 
10461 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10462   for (const VarDecl *VD : DeferredGlobalVariables) {
10463     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10464         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10465     if (!Res)
10466       continue;
10467     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10468         !HasRequiresUnifiedSharedMemory) {
10469       CGM.EmitGlobal(VD);
10470     } else {
10471       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10472               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10473                HasRequiresUnifiedSharedMemory)) &&
10474              "Expected link clause or to clause with unified memory.");
10475       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10476     }
10477   }
10478 }
10479 
10480 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10481     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10482   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10483          " Expected target-based directive.");
10484 }
10485 
10486 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10487   for (const OMPClause *Clause : D->clauselists()) {
10488     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10489       HasRequiresUnifiedSharedMemory = true;
10490     } else if (const auto *AC =
10491                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10492       switch (AC->getAtomicDefaultMemOrderKind()) {
10493       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10494         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10495         break;
10496       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10497         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10498         break;
10499       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10500         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10501         break;
10502       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10503         break;
10504       }
10505     }
10506   }
10507 }
10508 
10509 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10510   return RequiresAtomicOrdering;
10511 }
10512 
10513 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10514                                                        LangAS &AS) {
10515   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10516     return false;
10517   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10518   switch(A->getAllocatorType()) {
10519   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10520   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10521   // Not supported, fallback to the default mem space.
10522   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10523   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10524   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10525   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10526   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10527   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10528   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10529     AS = LangAS::Default;
10530     return true;
10531   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10532     llvm_unreachable("Expected predefined allocator for the variables with the "
10533                      "static storage.");
10534   }
10535   return false;
10536 }
10537 
10538 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10539   return HasRequiresUnifiedSharedMemory;
10540 }
10541 
10542 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10543     CodeGenModule &CGM)
10544     : CGM(CGM) {
10545   if (CGM.getLangOpts().OpenMPIsDevice) {
10546     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10547     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10548   }
10549 }
10550 
10551 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10552   if (CGM.getLangOpts().OpenMPIsDevice)
10553     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10554 }
10555 
10556 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10557   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10558     return true;
10559 
10560   const auto *D = cast<FunctionDecl>(GD.getDecl());
10561   // Do not to emit function if it is marked as declare target as it was already
10562   // emitted.
10563   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10564     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10565       if (auto *F = dyn_cast_or_null<llvm::Function>(
10566               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10567         return !F->isDeclaration();
10568       return false;
10569     }
10570     return true;
10571   }
10572 
10573   return !AlreadyEmittedTargetDecls.insert(D).second;
10574 }
10575 
10576 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10577   // If we don't have entries or if we are emitting code for the device, we
10578   // don't need to do anything.
10579   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10580       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10581       (OffloadEntriesInfoManager.empty() &&
10582        !HasEmittedDeclareTargetRegion &&
10583        !HasEmittedTargetRegion))
10584     return nullptr;
10585 
10586   // Create and register the function that handles the requires directives.
10587   ASTContext &C = CGM.getContext();
10588 
10589   llvm::Function *RequiresRegFn;
10590   {
10591     CodeGenFunction CGF(CGM);
10592     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10593     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10594     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10595     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10596     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10597     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10598     // TODO: check for other requires clauses.
10599     // The requires directive takes effect only when a target region is
10600     // present in the compilation unit. Otherwise it is ignored and not
10601     // passed to the runtime. This avoids the runtime from throwing an error
10602     // for mismatching requires clauses across compilation units that don't
10603     // contain at least 1 target region.
10604     assert((HasEmittedTargetRegion ||
10605             HasEmittedDeclareTargetRegion ||
10606             !OffloadEntriesInfoManager.empty()) &&
10607            "Target or declare target region expected.");
10608     if (HasRequiresUnifiedSharedMemory)
10609       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10610     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10611         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10612     CGF.FinishFunction();
10613   }
10614   return RequiresRegFn;
10615 }
10616 
10617 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10618                                     const OMPExecutableDirective &D,
10619                                     SourceLocation Loc,
10620                                     llvm::Function *OutlinedFn,
10621                                     ArrayRef<llvm::Value *> CapturedVars) {
10622   if (!CGF.HaveInsertPoint())
10623     return;
10624 
10625   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10626   CodeGenFunction::RunCleanupsScope Scope(CGF);
10627 
10628   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10629   llvm::Value *Args[] = {
10630       RTLoc,
10631       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10632       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10633   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10634   RealArgs.append(std::begin(Args), std::end(Args));
10635   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10636 
10637   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10638   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10639 }
10640 
10641 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10642                                          const Expr *NumTeams,
10643                                          const Expr *ThreadLimit,
10644                                          SourceLocation Loc) {
10645   if (!CGF.HaveInsertPoint())
10646     return;
10647 
10648   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10649 
10650   llvm::Value *NumTeamsVal =
10651       NumTeams
10652           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10653                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10654           : CGF.Builder.getInt32(0);
10655 
10656   llvm::Value *ThreadLimitVal =
10657       ThreadLimit
10658           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10659                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10660           : CGF.Builder.getInt32(0);
10661 
10662   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10663   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10664                                      ThreadLimitVal};
10665   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10666                       PushNumTeamsArgs);
10667 }
10668 
10669 void CGOpenMPRuntime::emitTargetDataCalls(
10670     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10671     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10672   if (!CGF.HaveInsertPoint())
10673     return;
10674 
10675   // Action used to replace the default codegen action and turn privatization
10676   // off.
10677   PrePostActionTy NoPrivAction;
10678 
10679   // Generate the code for the opening of the data environment. Capture all the
10680   // arguments of the runtime call by reference because they are used in the
10681   // closing of the region.
10682   auto &&BeginThenGen = [this, &D, Device, &Info,
10683                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10684     // Fill up the arrays with all the mapped variables.
10685     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10686     MappableExprsHandler::MapValuesArrayTy Pointers;
10687     MappableExprsHandler::MapValuesArrayTy Sizes;
10688     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10689 
10690     // Get map clause information.
10691     MappableExprsHandler MCHandler(D, CGF);
10692     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10693 
10694     // Fill up the arrays and create the arguments.
10695     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10696 
10697     llvm::Value *BasePointersArrayArg = nullptr;
10698     llvm::Value *PointersArrayArg = nullptr;
10699     llvm::Value *SizesArrayArg = nullptr;
10700     llvm::Value *MapTypesArrayArg = nullptr;
10701     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10702                                  SizesArrayArg, MapTypesArrayArg, Info);
10703 
10704     // Emit device ID if any.
10705     llvm::Value *DeviceID = nullptr;
10706     if (Device) {
10707       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10708                                            CGF.Int64Ty, /*isSigned=*/true);
10709     } else {
10710       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10711     }
10712 
10713     // Emit the number of elements in the offloading arrays.
10714     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10715 
10716     llvm::Value *OffloadingArgs[] = {
10717         DeviceID,         PointerNum,    BasePointersArrayArg,
10718         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10719     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10720                         OffloadingArgs);
10721 
10722     // If device pointer privatization is required, emit the body of the region
10723     // here. It will have to be duplicated: with and without privatization.
10724     if (!Info.CaptureDeviceAddrMap.empty())
10725       CodeGen(CGF);
10726   };
10727 
10728   // Generate code for the closing of the data region.
10729   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10730                                             PrePostActionTy &) {
10731     assert(Info.isValid() && "Invalid data environment closing arguments.");
10732 
10733     llvm::Value *BasePointersArrayArg = nullptr;
10734     llvm::Value *PointersArrayArg = nullptr;
10735     llvm::Value *SizesArrayArg = nullptr;
10736     llvm::Value *MapTypesArrayArg = nullptr;
10737     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10738                                  SizesArrayArg, MapTypesArrayArg, Info);
10739 
10740     // Emit device ID if any.
10741     llvm::Value *DeviceID = nullptr;
10742     if (Device) {
10743       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10744                                            CGF.Int64Ty, /*isSigned=*/true);
10745     } else {
10746       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10747     }
10748 
10749     // Emit the number of elements in the offloading arrays.
10750     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10751 
10752     llvm::Value *OffloadingArgs[] = {
10753         DeviceID,         PointerNum,    BasePointersArrayArg,
10754         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10755     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10756                         OffloadingArgs);
10757   };
10758 
10759   // If we need device pointer privatization, we need to emit the body of the
10760   // region with no privatization in the 'else' branch of the conditional.
10761   // Otherwise, we don't have to do anything.
10762   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10763                                                          PrePostActionTy &) {
10764     if (!Info.CaptureDeviceAddrMap.empty()) {
10765       CodeGen.setAction(NoPrivAction);
10766       CodeGen(CGF);
10767     }
10768   };
10769 
10770   // We don't have to do anything to close the region if the if clause evaluates
10771   // to false.
10772   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10773 
10774   if (IfCond) {
10775     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10776   } else {
10777     RegionCodeGenTy RCG(BeginThenGen);
10778     RCG(CGF);
10779   }
10780 
10781   // If we don't require privatization of device pointers, we emit the body in
10782   // between the runtime calls. This avoids duplicating the body code.
10783   if (Info.CaptureDeviceAddrMap.empty()) {
10784     CodeGen.setAction(NoPrivAction);
10785     CodeGen(CGF);
10786   }
10787 
10788   if (IfCond) {
10789     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10790   } else {
10791     RegionCodeGenTy RCG(EndThenGen);
10792     RCG(CGF);
10793   }
10794 }
10795 
10796 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10797     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10798     const Expr *Device) {
10799   if (!CGF.HaveInsertPoint())
10800     return;
10801 
10802   assert((isa<OMPTargetEnterDataDirective>(D) ||
10803           isa<OMPTargetExitDataDirective>(D) ||
10804           isa<OMPTargetUpdateDirective>(D)) &&
10805          "Expecting either target enter, exit data, or update directives.");
10806 
10807   CodeGenFunction::OMPTargetDataInfo InputInfo;
10808   llvm::Value *MapTypesArray = nullptr;
10809   // Generate the code for the opening of the data environment.
10810   auto &&ThenGen = [this, &D, Device, &InputInfo,
10811                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10812     // Emit device ID if any.
10813     llvm::Value *DeviceID = nullptr;
10814     if (Device) {
10815       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10816                                            CGF.Int64Ty, /*isSigned=*/true);
10817     } else {
10818       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10819     }
10820 
10821     // Emit the number of elements in the offloading arrays.
10822     llvm::Constant *PointerNum =
10823         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10824 
10825     llvm::Value *OffloadingArgs[] = {DeviceID,
10826                                      PointerNum,
10827                                      InputInfo.BasePointersArray.getPointer(),
10828                                      InputInfo.PointersArray.getPointer(),
10829                                      InputInfo.SizesArray.getPointer(),
10830                                      MapTypesArray};
10831 
10832     // Select the right runtime function call for each expected standalone
10833     // directive.
10834     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10835     OpenMPRTLFunction RTLFn;
10836     switch (D.getDirectiveKind()) {
10837     case OMPD_target_enter_data:
10838       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10839                         : OMPRTL__tgt_target_data_begin;
10840       break;
10841     case OMPD_target_exit_data:
10842       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10843                         : OMPRTL__tgt_target_data_end;
10844       break;
10845     case OMPD_target_update:
10846       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10847                         : OMPRTL__tgt_target_data_update;
10848       break;
10849     case OMPD_parallel:
10850     case OMPD_for:
10851     case OMPD_parallel_for:
10852     case OMPD_parallel_master:
10853     case OMPD_parallel_sections:
10854     case OMPD_for_simd:
10855     case OMPD_parallel_for_simd:
10856     case OMPD_cancel:
10857     case OMPD_cancellation_point:
10858     case OMPD_ordered:
10859     case OMPD_threadprivate:
10860     case OMPD_allocate:
10861     case OMPD_task:
10862     case OMPD_simd:
10863     case OMPD_sections:
10864     case OMPD_section:
10865     case OMPD_single:
10866     case OMPD_master:
10867     case OMPD_critical:
10868     case OMPD_taskyield:
10869     case OMPD_barrier:
10870     case OMPD_taskwait:
10871     case OMPD_taskgroup:
10872     case OMPD_atomic:
10873     case OMPD_flush:
10874     case OMPD_depobj:
10875     case OMPD_scan:
10876     case OMPD_teams:
10877     case OMPD_target_data:
10878     case OMPD_distribute:
10879     case OMPD_distribute_simd:
10880     case OMPD_distribute_parallel_for:
10881     case OMPD_distribute_parallel_for_simd:
10882     case OMPD_teams_distribute:
10883     case OMPD_teams_distribute_simd:
10884     case OMPD_teams_distribute_parallel_for:
10885     case OMPD_teams_distribute_parallel_for_simd:
10886     case OMPD_declare_simd:
10887     case OMPD_declare_variant:
10888     case OMPD_begin_declare_variant:
10889     case OMPD_end_declare_variant:
10890     case OMPD_declare_target:
10891     case OMPD_end_declare_target:
10892     case OMPD_declare_reduction:
10893     case OMPD_declare_mapper:
10894     case OMPD_taskloop:
10895     case OMPD_taskloop_simd:
10896     case OMPD_master_taskloop:
10897     case OMPD_master_taskloop_simd:
10898     case OMPD_parallel_master_taskloop:
10899     case OMPD_parallel_master_taskloop_simd:
10900     case OMPD_target:
10901     case OMPD_target_simd:
10902     case OMPD_target_teams_distribute:
10903     case OMPD_target_teams_distribute_simd:
10904     case OMPD_target_teams_distribute_parallel_for:
10905     case OMPD_target_teams_distribute_parallel_for_simd:
10906     case OMPD_target_teams:
10907     case OMPD_target_parallel:
10908     case OMPD_target_parallel_for:
10909     case OMPD_target_parallel_for_simd:
10910     case OMPD_requires:
10911     case OMPD_unknown:
10912       llvm_unreachable("Unexpected standalone target data directive.");
10913       break;
10914     }
10915     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10916   };
10917 
10918   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10919                              CodeGenFunction &CGF, PrePostActionTy &) {
10920     // Fill up the arrays with all the mapped variables.
10921     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10922     MappableExprsHandler::MapValuesArrayTy Pointers;
10923     MappableExprsHandler::MapValuesArrayTy Sizes;
10924     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10925 
10926     // Get map clause information.
10927     MappableExprsHandler MEHandler(D, CGF);
10928     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10929 
10930     TargetDataInfo Info;
10931     // Fill up the arrays and create the arguments.
10932     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10933     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10934                                  Info.PointersArray, Info.SizesArray,
10935                                  Info.MapTypesArray, Info);
10936     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10937     InputInfo.BasePointersArray =
10938         Address(Info.BasePointersArray, CGM.getPointerAlign());
10939     InputInfo.PointersArray =
10940         Address(Info.PointersArray, CGM.getPointerAlign());
10941     InputInfo.SizesArray =
10942         Address(Info.SizesArray, CGM.getPointerAlign());
10943     MapTypesArray = Info.MapTypesArray;
10944     if (D.hasClausesOfKind<OMPDependClause>())
10945       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10946     else
10947       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10948   };
10949 
10950   if (IfCond) {
10951     emitIfClause(CGF, IfCond, TargetThenGen,
10952                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10953   } else {
10954     RegionCodeGenTy ThenRCG(TargetThenGen);
10955     ThenRCG(CGF);
10956   }
10957 }
10958 
10959 namespace {
10960   /// Kind of parameter in a function with 'declare simd' directive.
10961   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10962   /// Attribute set of the parameter.
10963   struct ParamAttrTy {
10964     ParamKindTy Kind = Vector;
10965     llvm::APSInt StrideOrArg;
10966     llvm::APSInt Alignment;
10967   };
10968 } // namespace
10969 
10970 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10971                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10972   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10973   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10974   // of that clause. The VLEN value must be power of 2.
10975   // In other case the notion of the function`s "characteristic data type" (CDT)
10976   // is used to compute the vector length.
10977   // CDT is defined in the following order:
10978   //   a) For non-void function, the CDT is the return type.
10979   //   b) If the function has any non-uniform, non-linear parameters, then the
10980   //   CDT is the type of the first such parameter.
10981   //   c) If the CDT determined by a) or b) above is struct, union, or class
10982   //   type which is pass-by-value (except for the type that maps to the
10983   //   built-in complex data type), the characteristic data type is int.
10984   //   d) If none of the above three cases is applicable, the CDT is int.
10985   // The VLEN is then determined based on the CDT and the size of vector
10986   // register of that ISA for which current vector version is generated. The
10987   // VLEN is computed using the formula below:
10988   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10989   // where vector register size specified in section 3.2.1 Registers and the
10990   // Stack Frame of original AMD64 ABI document.
10991   QualType RetType = FD->getReturnType();
10992   if (RetType.isNull())
10993     return 0;
10994   ASTContext &C = FD->getASTContext();
10995   QualType CDT;
10996   if (!RetType.isNull() && !RetType->isVoidType()) {
10997     CDT = RetType;
10998   } else {
10999     unsigned Offset = 0;
11000     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11001       if (ParamAttrs[Offset].Kind == Vector)
11002         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11003       ++Offset;
11004     }
11005     if (CDT.isNull()) {
11006       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11007         if (ParamAttrs[I + Offset].Kind == Vector) {
11008           CDT = FD->getParamDecl(I)->getType();
11009           break;
11010         }
11011       }
11012     }
11013   }
11014   if (CDT.isNull())
11015     CDT = C.IntTy;
11016   CDT = CDT->getCanonicalTypeUnqualified();
11017   if (CDT->isRecordType() || CDT->isUnionType())
11018     CDT = C.IntTy;
11019   return C.getTypeSize(CDT);
11020 }
11021 
11022 static void
11023 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11024                            const llvm::APSInt &VLENVal,
11025                            ArrayRef<ParamAttrTy> ParamAttrs,
11026                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11027   struct ISADataTy {
11028     char ISA;
11029     unsigned VecRegSize;
11030   };
11031   ISADataTy ISAData[] = {
11032       {
11033           'b', 128
11034       }, // SSE
11035       {
11036           'c', 256
11037       }, // AVX
11038       {
11039           'd', 256
11040       }, // AVX2
11041       {
11042           'e', 512
11043       }, // AVX512
11044   };
11045   llvm::SmallVector<char, 2> Masked;
11046   switch (State) {
11047   case OMPDeclareSimdDeclAttr::BS_Undefined:
11048     Masked.push_back('N');
11049     Masked.push_back('M');
11050     break;
11051   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11052     Masked.push_back('N');
11053     break;
11054   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11055     Masked.push_back('M');
11056     break;
11057   }
11058   for (char Mask : Masked) {
11059     for (const ISADataTy &Data : ISAData) {
11060       SmallString<256> Buffer;
11061       llvm::raw_svector_ostream Out(Buffer);
11062       Out << "_ZGV" << Data.ISA << Mask;
11063       if (!VLENVal) {
11064         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11065         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11066         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11067       } else {
11068         Out << VLENVal;
11069       }
11070       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11071         switch (ParamAttr.Kind){
11072         case LinearWithVarStride:
11073           Out << 's' << ParamAttr.StrideOrArg;
11074           break;
11075         case Linear:
11076           Out << 'l';
11077           if (ParamAttr.StrideOrArg != 1)
11078             Out << ParamAttr.StrideOrArg;
11079           break;
11080         case Uniform:
11081           Out << 'u';
11082           break;
11083         case Vector:
11084           Out << 'v';
11085           break;
11086         }
11087         if (!!ParamAttr.Alignment)
11088           Out << 'a' << ParamAttr.Alignment;
11089       }
11090       Out << '_' << Fn->getName();
11091       Fn->addFnAttr(Out.str());
11092     }
11093   }
11094 }
11095 
11096 // This are the Functions that are needed to mangle the name of the
11097 // vector functions generated by the compiler, according to the rules
11098 // defined in the "Vector Function ABI specifications for AArch64",
11099 // available at
11100 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11101 
11102 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11103 ///
11104 /// TODO: Need to implement the behavior for reference marked with a
11105 /// var or no linear modifiers (1.b in the section). For this, we
11106 /// need to extend ParamKindTy to support the linear modifiers.
11107 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11108   QT = QT.getCanonicalType();
11109 
11110   if (QT->isVoidType())
11111     return false;
11112 
11113   if (Kind == ParamKindTy::Uniform)
11114     return false;
11115 
11116   if (Kind == ParamKindTy::Linear)
11117     return false;
11118 
11119   // TODO: Handle linear references with modifiers
11120 
11121   if (Kind == ParamKindTy::LinearWithVarStride)
11122     return false;
11123 
11124   return true;
11125 }
11126 
11127 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11128 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11129   QT = QT.getCanonicalType();
11130   unsigned Size = C.getTypeSize(QT);
11131 
11132   // Only scalars and complex within 16 bytes wide set PVB to true.
11133   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11134     return false;
11135 
11136   if (QT->isFloatingType())
11137     return true;
11138 
11139   if (QT->isIntegerType())
11140     return true;
11141 
11142   if (QT->isPointerType())
11143     return true;
11144 
11145   // TODO: Add support for complex types (section 3.1.2, item 2).
11146 
11147   return false;
11148 }
11149 
11150 /// Computes the lane size (LS) of a return type or of an input parameter,
11151 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11152 /// TODO: Add support for references, section 3.2.1, item 1.
11153 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11154   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11155     QualType PTy = QT.getCanonicalType()->getPointeeType();
11156     if (getAArch64PBV(PTy, C))
11157       return C.getTypeSize(PTy);
11158   }
11159   if (getAArch64PBV(QT, C))
11160     return C.getTypeSize(QT);
11161 
11162   return C.getTypeSize(C.getUIntPtrType());
11163 }
11164 
11165 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11166 // signature of the scalar function, as defined in 3.2.2 of the
11167 // AAVFABI.
11168 static std::tuple<unsigned, unsigned, bool>
11169 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11170   QualType RetType = FD->getReturnType().getCanonicalType();
11171 
11172   ASTContext &C = FD->getASTContext();
11173 
11174   bool OutputBecomesInput = false;
11175 
11176   llvm::SmallVector<unsigned, 8> Sizes;
11177   if (!RetType->isVoidType()) {
11178     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11179     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11180       OutputBecomesInput = true;
11181   }
11182   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11183     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11184     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11185   }
11186 
11187   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11188   // The LS of a function parameter / return value can only be a power
11189   // of 2, starting from 8 bits, up to 128.
11190   assert(std::all_of(Sizes.begin(), Sizes.end(),
11191                      [](unsigned Size) {
11192                        return Size == 8 || Size == 16 || Size == 32 ||
11193                               Size == 64 || Size == 128;
11194                      }) &&
11195          "Invalid size");
11196 
11197   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11198                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11199                          OutputBecomesInput);
11200 }
11201 
11202 /// Mangle the parameter part of the vector function name according to
11203 /// their OpenMP classification. The mangling function is defined in
11204 /// section 3.5 of the AAVFABI.
11205 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11206   SmallString<256> Buffer;
11207   llvm::raw_svector_ostream Out(Buffer);
11208   for (const auto &ParamAttr : ParamAttrs) {
11209     switch (ParamAttr.Kind) {
11210     case LinearWithVarStride:
11211       Out << "ls" << ParamAttr.StrideOrArg;
11212       break;
11213     case Linear:
11214       Out << 'l';
11215       // Don't print the step value if it is not present or if it is
11216       // equal to 1.
11217       if (ParamAttr.StrideOrArg != 1)
11218         Out << ParamAttr.StrideOrArg;
11219       break;
11220     case Uniform:
11221       Out << 'u';
11222       break;
11223     case Vector:
11224       Out << 'v';
11225       break;
11226     }
11227 
11228     if (!!ParamAttr.Alignment)
11229       Out << 'a' << ParamAttr.Alignment;
11230   }
11231 
11232   return std::string(Out.str());
11233 }
11234 
11235 // Function used to add the attribute. The parameter `VLEN` is
11236 // templated to allow the use of "x" when targeting scalable functions
11237 // for SVE.
11238 template <typename T>
11239 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11240                                  char ISA, StringRef ParSeq,
11241                                  StringRef MangledName, bool OutputBecomesInput,
11242                                  llvm::Function *Fn) {
11243   SmallString<256> Buffer;
11244   llvm::raw_svector_ostream Out(Buffer);
11245   Out << Prefix << ISA << LMask << VLEN;
11246   if (OutputBecomesInput)
11247     Out << "v";
11248   Out << ParSeq << "_" << MangledName;
11249   Fn->addFnAttr(Out.str());
11250 }
11251 
11252 // Helper function to generate the Advanced SIMD names depending on
11253 // the value of the NDS when simdlen is not present.
11254 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11255                                       StringRef Prefix, char ISA,
11256                                       StringRef ParSeq, StringRef MangledName,
11257                                       bool OutputBecomesInput,
11258                                       llvm::Function *Fn) {
11259   switch (NDS) {
11260   case 8:
11261     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11262                          OutputBecomesInput, Fn);
11263     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11264                          OutputBecomesInput, Fn);
11265     break;
11266   case 16:
11267     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11268                          OutputBecomesInput, Fn);
11269     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11270                          OutputBecomesInput, Fn);
11271     break;
11272   case 32:
11273     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11274                          OutputBecomesInput, Fn);
11275     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11276                          OutputBecomesInput, Fn);
11277     break;
11278   case 64:
11279   case 128:
11280     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11281                          OutputBecomesInput, Fn);
11282     break;
11283   default:
11284     llvm_unreachable("Scalar type is too wide.");
11285   }
11286 }
11287 
11288 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11289 static void emitAArch64DeclareSimdFunction(
11290     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11291     ArrayRef<ParamAttrTy> ParamAttrs,
11292     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11293     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11294 
11295   // Get basic data for building the vector signature.
11296   const auto Data = getNDSWDS(FD, ParamAttrs);
11297   const unsigned NDS = std::get<0>(Data);
11298   const unsigned WDS = std::get<1>(Data);
11299   const bool OutputBecomesInput = std::get<2>(Data);
11300 
11301   // Check the values provided via `simdlen` by the user.
11302   // 1. A `simdlen(1)` doesn't produce vector signatures,
11303   if (UserVLEN == 1) {
11304     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11305         DiagnosticsEngine::Warning,
11306         "The clause simdlen(1) has no effect when targeting aarch64.");
11307     CGM.getDiags().Report(SLoc, DiagID);
11308     return;
11309   }
11310 
11311   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11312   // Advanced SIMD output.
11313   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11314     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11315         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11316                                     "power of 2 when targeting Advanced SIMD.");
11317     CGM.getDiags().Report(SLoc, DiagID);
11318     return;
11319   }
11320 
11321   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11322   // limits.
11323   if (ISA == 's' && UserVLEN != 0) {
11324     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11325       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11326           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11327                                       "lanes in the architectural constraints "
11328                                       "for SVE (min is 128-bit, max is "
11329                                       "2048-bit, by steps of 128-bit)");
11330       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11331       return;
11332     }
11333   }
11334 
11335   // Sort out parameter sequence.
11336   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11337   StringRef Prefix = "_ZGV";
11338   // Generate simdlen from user input (if any).
11339   if (UserVLEN) {
11340     if (ISA == 's') {
11341       // SVE generates only a masked function.
11342       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11343                            OutputBecomesInput, Fn);
11344     } else {
11345       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11346       // Advanced SIMD generates one or two functions, depending on
11347       // the `[not]inbranch` clause.
11348       switch (State) {
11349       case OMPDeclareSimdDeclAttr::BS_Undefined:
11350         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11351                              OutputBecomesInput, Fn);
11352         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11353                              OutputBecomesInput, Fn);
11354         break;
11355       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11356         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11357                              OutputBecomesInput, Fn);
11358         break;
11359       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11360         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11361                              OutputBecomesInput, Fn);
11362         break;
11363       }
11364     }
11365   } else {
11366     // If no user simdlen is provided, follow the AAVFABI rules for
11367     // generating the vector length.
11368     if (ISA == 's') {
11369       // SVE, section 3.4.1, item 1.
11370       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11371                            OutputBecomesInput, Fn);
11372     } else {
11373       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11374       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11375       // two vector names depending on the use of the clause
11376       // `[not]inbranch`.
11377       switch (State) {
11378       case OMPDeclareSimdDeclAttr::BS_Undefined:
11379         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11380                                   OutputBecomesInput, Fn);
11381         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11382                                   OutputBecomesInput, Fn);
11383         break;
11384       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11385         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11386                                   OutputBecomesInput, Fn);
11387         break;
11388       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11389         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11390                                   OutputBecomesInput, Fn);
11391         break;
11392       }
11393     }
11394   }
11395 }
11396 
11397 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11398                                               llvm::Function *Fn) {
11399   ASTContext &C = CGM.getContext();
11400   FD = FD->getMostRecentDecl();
11401   // Map params to their positions in function decl.
11402   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11403   if (isa<CXXMethodDecl>(FD))
11404     ParamPositions.try_emplace(FD, 0);
11405   unsigned ParamPos = ParamPositions.size();
11406   for (const ParmVarDecl *P : FD->parameters()) {
11407     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11408     ++ParamPos;
11409   }
11410   while (FD) {
11411     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11412       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11413       // Mark uniform parameters.
11414       for (const Expr *E : Attr->uniforms()) {
11415         E = E->IgnoreParenImpCasts();
11416         unsigned Pos;
11417         if (isa<CXXThisExpr>(E)) {
11418           Pos = ParamPositions[FD];
11419         } else {
11420           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11421                                 ->getCanonicalDecl();
11422           Pos = ParamPositions[PVD];
11423         }
11424         ParamAttrs[Pos].Kind = Uniform;
11425       }
11426       // Get alignment info.
11427       auto NI = Attr->alignments_begin();
11428       for (const Expr *E : Attr->aligneds()) {
11429         E = E->IgnoreParenImpCasts();
11430         unsigned Pos;
11431         QualType ParmTy;
11432         if (isa<CXXThisExpr>(E)) {
11433           Pos = ParamPositions[FD];
11434           ParmTy = E->getType();
11435         } else {
11436           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11437                                 ->getCanonicalDecl();
11438           Pos = ParamPositions[PVD];
11439           ParmTy = PVD->getType();
11440         }
11441         ParamAttrs[Pos].Alignment =
11442             (*NI)
11443                 ? (*NI)->EvaluateKnownConstInt(C)
11444                 : llvm::APSInt::getUnsigned(
11445                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11446                           .getQuantity());
11447         ++NI;
11448       }
11449       // Mark linear parameters.
11450       auto SI = Attr->steps_begin();
11451       auto MI = Attr->modifiers_begin();
11452       for (const Expr *E : Attr->linears()) {
11453         E = E->IgnoreParenImpCasts();
11454         unsigned Pos;
11455         // Rescaling factor needed to compute the linear parameter
11456         // value in the mangled name.
11457         unsigned PtrRescalingFactor = 1;
11458         if (isa<CXXThisExpr>(E)) {
11459           Pos = ParamPositions[FD];
11460         } else {
11461           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11462                                 ->getCanonicalDecl();
11463           Pos = ParamPositions[PVD];
11464           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11465             PtrRescalingFactor = CGM.getContext()
11466                                      .getTypeSizeInChars(P->getPointeeType())
11467                                      .getQuantity();
11468         }
11469         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11470         ParamAttr.Kind = Linear;
11471         // Assuming a stride of 1, for `linear` without modifiers.
11472         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11473         if (*SI) {
11474           Expr::EvalResult Result;
11475           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11476             if (const auto *DRE =
11477                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11478               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11479                 ParamAttr.Kind = LinearWithVarStride;
11480                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11481                     ParamPositions[StridePVD->getCanonicalDecl()]);
11482               }
11483             }
11484           } else {
11485             ParamAttr.StrideOrArg = Result.Val.getInt();
11486           }
11487         }
11488         // If we are using a linear clause on a pointer, we need to
11489         // rescale the value of linear_step with the byte size of the
11490         // pointee type.
11491         if (Linear == ParamAttr.Kind)
11492           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11493         ++SI;
11494         ++MI;
11495       }
11496       llvm::APSInt VLENVal;
11497       SourceLocation ExprLoc;
11498       const Expr *VLENExpr = Attr->getSimdlen();
11499       if (VLENExpr) {
11500         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11501         ExprLoc = VLENExpr->getExprLoc();
11502       }
11503       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11504       if (CGM.getTriple().isX86()) {
11505         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11506       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11507         unsigned VLEN = VLENVal.getExtValue();
11508         StringRef MangledName = Fn->getName();
11509         if (CGM.getTarget().hasFeature("sve"))
11510           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11511                                          MangledName, 's', 128, Fn, ExprLoc);
11512         if (CGM.getTarget().hasFeature("neon"))
11513           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11514                                          MangledName, 'n', 128, Fn, ExprLoc);
11515       }
11516     }
11517     FD = FD->getPreviousDecl();
11518   }
11519 }
11520 
11521 namespace {
11522 /// Cleanup action for doacross support.
11523 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11524 public:
11525   static const int DoacrossFinArgs = 2;
11526 
11527 private:
11528   llvm::FunctionCallee RTLFn;
11529   llvm::Value *Args[DoacrossFinArgs];
11530 
11531 public:
11532   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11533                     ArrayRef<llvm::Value *> CallArgs)
11534       : RTLFn(RTLFn) {
11535     assert(CallArgs.size() == DoacrossFinArgs);
11536     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11537   }
11538   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11539     if (!CGF.HaveInsertPoint())
11540       return;
11541     CGF.EmitRuntimeCall(RTLFn, Args);
11542   }
11543 };
11544 } // namespace
11545 
11546 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11547                                        const OMPLoopDirective &D,
11548                                        ArrayRef<Expr *> NumIterations) {
11549   if (!CGF.HaveInsertPoint())
11550     return;
11551 
11552   ASTContext &C = CGM.getContext();
11553   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11554   RecordDecl *RD;
11555   if (KmpDimTy.isNull()) {
11556     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11557     //  kmp_int64 lo; // lower
11558     //  kmp_int64 up; // upper
11559     //  kmp_int64 st; // stride
11560     // };
11561     RD = C.buildImplicitRecord("kmp_dim");
11562     RD->startDefinition();
11563     addFieldToRecordDecl(C, RD, Int64Ty);
11564     addFieldToRecordDecl(C, RD, Int64Ty);
11565     addFieldToRecordDecl(C, RD, Int64Ty);
11566     RD->completeDefinition();
11567     KmpDimTy = C.getRecordType(RD);
11568   } else {
11569     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11570   }
11571   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11572   QualType ArrayTy =
11573       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11574 
11575   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11576   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11577   enum { LowerFD = 0, UpperFD, StrideFD };
11578   // Fill dims with data.
11579   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11580     LValue DimsLVal = CGF.MakeAddrLValue(
11581         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11582     // dims.upper = num_iterations;
11583     LValue UpperLVal = CGF.EmitLValueForField(
11584         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11585     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11586         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11587         Int64Ty, NumIterations[I]->getExprLoc());
11588     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11589     // dims.stride = 1;
11590     LValue StrideLVal = CGF.EmitLValueForField(
11591         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11592     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11593                           StrideLVal);
11594   }
11595 
11596   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11597   // kmp_int32 num_dims, struct kmp_dim * dims);
11598   llvm::Value *Args[] = {
11599       emitUpdateLocation(CGF, D.getBeginLoc()),
11600       getThreadID(CGF, D.getBeginLoc()),
11601       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11602       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11603           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11604           CGM.VoidPtrTy)};
11605 
11606   llvm::FunctionCallee RTLFn =
11607       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11608   CGF.EmitRuntimeCall(RTLFn, Args);
11609   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11610       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11611   llvm::FunctionCallee FiniRTLFn =
11612       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11613   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11614                                              llvm::makeArrayRef(FiniArgs));
11615 }
11616 
11617 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11618                                           const OMPDependClause *C) {
11619   QualType Int64Ty =
11620       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11621   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11622   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11623       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11624   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11625   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11626     const Expr *CounterVal = C->getLoopData(I);
11627     assert(CounterVal);
11628     llvm::Value *CntVal = CGF.EmitScalarConversion(
11629         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11630         CounterVal->getExprLoc());
11631     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11632                           /*Volatile=*/false, Int64Ty);
11633   }
11634   llvm::Value *Args[] = {
11635       emitUpdateLocation(CGF, C->getBeginLoc()),
11636       getThreadID(CGF, C->getBeginLoc()),
11637       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11638   llvm::FunctionCallee RTLFn;
11639   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11640     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11641   } else {
11642     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11643     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11644   }
11645   CGF.EmitRuntimeCall(RTLFn, Args);
11646 }
11647 
11648 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11649                                llvm::FunctionCallee Callee,
11650                                ArrayRef<llvm::Value *> Args) const {
11651   assert(Loc.isValid() && "Outlined function call location must be valid.");
11652   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11653 
11654   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11655     if (Fn->doesNotThrow()) {
11656       CGF.EmitNounwindRuntimeCall(Fn, Args);
11657       return;
11658     }
11659   }
11660   CGF.EmitRuntimeCall(Callee, Args);
11661 }
11662 
11663 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11664     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11665     ArrayRef<llvm::Value *> Args) const {
11666   emitCall(CGF, Loc, OutlinedFn, Args);
11667 }
11668 
11669 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11670   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11671     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11672       HasEmittedDeclareTargetRegion = true;
11673 }
11674 
11675 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11676                                              const VarDecl *NativeParam,
11677                                              const VarDecl *TargetParam) const {
11678   return CGF.GetAddrOfLocalVar(NativeParam);
11679 }
11680 
11681 namespace {
11682 /// Cleanup action for allocate support.
11683 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11684 public:
11685   static const int CleanupArgs = 3;
11686 
11687 private:
11688   llvm::FunctionCallee RTLFn;
11689   llvm::Value *Args[CleanupArgs];
11690 
11691 public:
11692   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11693                        ArrayRef<llvm::Value *> CallArgs)
11694       : RTLFn(RTLFn) {
11695     assert(CallArgs.size() == CleanupArgs &&
11696            "Size of arguments does not match.");
11697     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11698   }
11699   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11700     if (!CGF.HaveInsertPoint())
11701       return;
11702     CGF.EmitRuntimeCall(RTLFn, Args);
11703   }
11704 };
11705 } // namespace
11706 
11707 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11708                                                    const VarDecl *VD) {
11709   if (!VD)
11710     return Address::invalid();
11711   const VarDecl *CVD = VD->getCanonicalDecl();
11712   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11713     return Address::invalid();
11714   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11715   // Use the default allocation.
11716   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11717        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11718       !AA->getAllocator())
11719     return Address::invalid();
11720   llvm::Value *Size;
11721   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11722   if (CVD->getType()->isVariablyModifiedType()) {
11723     Size = CGF.getTypeSize(CVD->getType());
11724     // Align the size: ((size + align - 1) / align) * align
11725     Size = CGF.Builder.CreateNUWAdd(
11726         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11727     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11728     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11729   } else {
11730     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11731     Size = CGM.getSize(Sz.alignTo(Align));
11732   }
11733   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11734   assert(AA->getAllocator() &&
11735          "Expected allocator expression for non-default allocator.");
11736   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11737   // According to the standard, the original allocator type is a enum (integer).
11738   // Convert to pointer type, if required.
11739   if (Allocator->getType()->isIntegerTy())
11740     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11741   else if (Allocator->getType()->isPointerTy())
11742     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11743                                                                 CGM.VoidPtrTy);
11744   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11745 
11746   llvm::Value *Addr =
11747       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11748                           getName({CVD->getName(), ".void.addr"}));
11749   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11750                                                               Allocator};
11751   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11752 
11753   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11754                                                 llvm::makeArrayRef(FiniArgs));
11755   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11756       Addr,
11757       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11758       getName({CVD->getName(), ".addr"}));
11759   return Address(Addr, Align);
11760 }
11761 
11762 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11763     CodeGenModule &CGM, const OMPLoopDirective &S)
11764     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11765   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11766   if (!NeedToPush)
11767     return;
11768   NontemporalDeclsSet &DS =
11769       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11770   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11771     for (const Stmt *Ref : C->private_refs()) {
11772       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11773       const ValueDecl *VD;
11774       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11775         VD = DRE->getDecl();
11776       } else {
11777         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11778         assert((ME->isImplicitCXXThis() ||
11779                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11780                "Expected member of current class.");
11781         VD = ME->getMemberDecl();
11782       }
11783       DS.insert(VD);
11784     }
11785   }
11786 }
11787 
11788 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11789   if (!NeedToPush)
11790     return;
11791   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11792 }
11793 
11794 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11795   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11796 
11797   return llvm::any_of(
11798       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11799       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11800 }
11801 
11802 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11803     const OMPExecutableDirective &S,
11804     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11805     const {
11806   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11807   // Vars in target/task regions must be excluded completely.
11808   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11809       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11810     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11811     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11812     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11813     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11814       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11815         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11816     }
11817   }
11818   // Exclude vars in private clauses.
11819   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11820     for (const Expr *Ref : C->varlists()) {
11821       if (!Ref->getType()->isScalarType())
11822         continue;
11823       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11824       if (!DRE)
11825         continue;
11826       NeedToCheckForLPCs.insert(DRE->getDecl());
11827     }
11828   }
11829   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11830     for (const Expr *Ref : C->varlists()) {
11831       if (!Ref->getType()->isScalarType())
11832         continue;
11833       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11834       if (!DRE)
11835         continue;
11836       NeedToCheckForLPCs.insert(DRE->getDecl());
11837     }
11838   }
11839   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11840     for (const Expr *Ref : C->varlists()) {
11841       if (!Ref->getType()->isScalarType())
11842         continue;
11843       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11844       if (!DRE)
11845         continue;
11846       NeedToCheckForLPCs.insert(DRE->getDecl());
11847     }
11848   }
11849   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11850     for (const Expr *Ref : C->varlists()) {
11851       if (!Ref->getType()->isScalarType())
11852         continue;
11853       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11854       if (!DRE)
11855         continue;
11856       NeedToCheckForLPCs.insert(DRE->getDecl());
11857     }
11858   }
11859   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11860     for (const Expr *Ref : C->varlists()) {
11861       if (!Ref->getType()->isScalarType())
11862         continue;
11863       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11864       if (!DRE)
11865         continue;
11866       NeedToCheckForLPCs.insert(DRE->getDecl());
11867     }
11868   }
11869   for (const Decl *VD : NeedToCheckForLPCs) {
11870     for (const LastprivateConditionalData &Data :
11871          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11872       if (Data.DeclToUniqueName.count(VD) > 0) {
11873         if (!Data.Disabled)
11874           NeedToAddForLPCsAsDisabled.insert(VD);
11875         break;
11876       }
11877     }
11878   }
11879 }
11880 
11881 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11882     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11883     : CGM(CGF.CGM),
11884       Action((CGM.getLangOpts().OpenMP >= 50 &&
11885               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11886                            [](const OMPLastprivateClause *C) {
11887                              return C->getKind() ==
11888                                     OMPC_LASTPRIVATE_conditional;
11889                            }))
11890                  ? ActionToDo::PushAsLastprivateConditional
11891                  : ActionToDo::DoNotPush) {
11892   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11893   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11894     return;
11895   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11896          "Expected a push action.");
11897   LastprivateConditionalData &Data =
11898       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11899   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11900     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11901       continue;
11902 
11903     for (const Expr *Ref : C->varlists()) {
11904       Data.DeclToUniqueName.insert(std::make_pair(
11905           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11906           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11907     }
11908   }
11909   Data.IVLVal = IVLVal;
11910   Data.Fn = CGF.CurFn;
11911 }
11912 
11913 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11914     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11915     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11916   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11917   if (CGM.getLangOpts().OpenMP < 50)
11918     return;
11919   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11920   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11921   if (!NeedToAddForLPCsAsDisabled.empty()) {
11922     Action = ActionToDo::DisableLastprivateConditional;
11923     LastprivateConditionalData &Data =
11924         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11925     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11926       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11927     Data.Fn = CGF.CurFn;
11928     Data.Disabled = true;
11929   }
11930 }
11931 
11932 CGOpenMPRuntime::LastprivateConditionalRAII
11933 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11934     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11935   return LastprivateConditionalRAII(CGF, S);
11936 }
11937 
11938 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11939   if (CGM.getLangOpts().OpenMP < 50)
11940     return;
11941   if (Action == ActionToDo::DisableLastprivateConditional) {
11942     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11943            "Expected list of disabled private vars.");
11944     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11945   }
11946   if (Action == ActionToDo::PushAsLastprivateConditional) {
11947     assert(
11948         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11949         "Expected list of lastprivate conditional vars.");
11950     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11951   }
11952 }
11953 
11954 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11955                                                         const VarDecl *VD) {
11956   ASTContext &C = CGM.getContext();
11957   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11958   if (I == LastprivateConditionalToTypes.end())
11959     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11960   QualType NewType;
11961   const FieldDecl *VDField;
11962   const FieldDecl *FiredField;
11963   LValue BaseLVal;
11964   auto VI = I->getSecond().find(VD);
11965   if (VI == I->getSecond().end()) {
11966     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11967     RD->startDefinition();
11968     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11969     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11970     RD->completeDefinition();
11971     NewType = C.getRecordType(RD);
11972     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11973     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11974     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11975   } else {
11976     NewType = std::get<0>(VI->getSecond());
11977     VDField = std::get<1>(VI->getSecond());
11978     FiredField = std::get<2>(VI->getSecond());
11979     BaseLVal = std::get<3>(VI->getSecond());
11980   }
11981   LValue FiredLVal =
11982       CGF.EmitLValueForField(BaseLVal, FiredField);
11983   CGF.EmitStoreOfScalar(
11984       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11985       FiredLVal);
11986   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11987 }
11988 
11989 namespace {
11990 /// Checks if the lastprivate conditional variable is referenced in LHS.
11991 class LastprivateConditionalRefChecker final
11992     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11993   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11994   const Expr *FoundE = nullptr;
11995   const Decl *FoundD = nullptr;
11996   StringRef UniqueDeclName;
11997   LValue IVLVal;
11998   llvm::Function *FoundFn = nullptr;
11999   SourceLocation Loc;
12000 
12001 public:
12002   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12003     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12004          llvm::reverse(LPM)) {
12005       auto It = D.DeclToUniqueName.find(E->getDecl());
12006       if (It == D.DeclToUniqueName.end())
12007         continue;
12008       if (D.Disabled)
12009         return false;
12010       FoundE = E;
12011       FoundD = E->getDecl()->getCanonicalDecl();
12012       UniqueDeclName = It->second;
12013       IVLVal = D.IVLVal;
12014       FoundFn = D.Fn;
12015       break;
12016     }
12017     return FoundE == E;
12018   }
12019   bool VisitMemberExpr(const MemberExpr *E) {
12020     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12021       return false;
12022     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12023          llvm::reverse(LPM)) {
12024       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12025       if (It == D.DeclToUniqueName.end())
12026         continue;
12027       if (D.Disabled)
12028         return false;
12029       FoundE = E;
12030       FoundD = E->getMemberDecl()->getCanonicalDecl();
12031       UniqueDeclName = It->second;
12032       IVLVal = D.IVLVal;
12033       FoundFn = D.Fn;
12034       break;
12035     }
12036     return FoundE == E;
12037   }
12038   bool VisitStmt(const Stmt *S) {
12039     for (const Stmt *Child : S->children()) {
12040       if (!Child)
12041         continue;
12042       if (const auto *E = dyn_cast<Expr>(Child))
12043         if (!E->isGLValue())
12044           continue;
12045       if (Visit(Child))
12046         return true;
12047     }
12048     return false;
12049   }
12050   explicit LastprivateConditionalRefChecker(
12051       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12052       : LPM(LPM) {}
12053   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12054   getFoundData() const {
12055     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12056   }
12057 };
12058 } // namespace
12059 
12060 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12061                                                        LValue IVLVal,
12062                                                        StringRef UniqueDeclName,
12063                                                        LValue LVal,
12064                                                        SourceLocation Loc) {
12065   // Last updated loop counter for the lastprivate conditional var.
12066   // int<xx> last_iv = 0;
12067   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12068   llvm::Constant *LastIV =
12069       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12070   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12071       IVLVal.getAlignment().getAsAlign());
12072   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12073 
12074   // Last value of the lastprivate conditional.
12075   // decltype(priv_a) last_a;
12076   llvm::Constant *Last = getOrCreateInternalVariable(
12077       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12078   cast<llvm::GlobalVariable>(Last)->setAlignment(
12079       LVal.getAlignment().getAsAlign());
12080   LValue LastLVal =
12081       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12082 
12083   // Global loop counter. Required to handle inner parallel-for regions.
12084   // iv
12085   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12086 
12087   // #pragma omp critical(a)
12088   // if (last_iv <= iv) {
12089   //   last_iv = iv;
12090   //   last_a = priv_a;
12091   // }
12092   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12093                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12094     Action.Enter(CGF);
12095     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12096     // (last_iv <= iv) ? Check if the variable is updated and store new
12097     // value in global var.
12098     llvm::Value *CmpRes;
12099     if (IVLVal.getType()->isSignedIntegerType()) {
12100       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12101     } else {
12102       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12103              "Loop iteration variable must be integer.");
12104       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12105     }
12106     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12107     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12108     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12109     // {
12110     CGF.EmitBlock(ThenBB);
12111 
12112     //   last_iv = iv;
12113     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12114 
12115     //   last_a = priv_a;
12116     switch (CGF.getEvaluationKind(LVal.getType())) {
12117     case TEK_Scalar: {
12118       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12119       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12120       break;
12121     }
12122     case TEK_Complex: {
12123       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12124       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12125       break;
12126     }
12127     case TEK_Aggregate:
12128       llvm_unreachable(
12129           "Aggregates are not supported in lastprivate conditional.");
12130     }
12131     // }
12132     CGF.EmitBranch(ExitBB);
12133     // There is no need to emit line number for unconditional branch.
12134     (void)ApplyDebugLocation::CreateEmpty(CGF);
12135     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12136   };
12137 
12138   if (CGM.getLangOpts().OpenMPSimd) {
12139     // Do not emit as a critical region as no parallel region could be emitted.
12140     RegionCodeGenTy ThenRCG(CodeGen);
12141     ThenRCG(CGF);
12142   } else {
12143     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12144   }
12145 }
12146 
12147 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12148                                                          const Expr *LHS) {
12149   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12150     return;
12151   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12152   if (!Checker.Visit(LHS))
12153     return;
12154   const Expr *FoundE;
12155   const Decl *FoundD;
12156   StringRef UniqueDeclName;
12157   LValue IVLVal;
12158   llvm::Function *FoundFn;
12159   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12160       Checker.getFoundData();
12161   if (FoundFn != CGF.CurFn) {
12162     // Special codegen for inner parallel regions.
12163     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12164     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12165     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12166            "Lastprivate conditional is not found in outer region.");
12167     QualType StructTy = std::get<0>(It->getSecond());
12168     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12169     LValue PrivLVal = CGF.EmitLValue(FoundE);
12170     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12171         PrivLVal.getAddress(CGF),
12172         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12173     LValue BaseLVal =
12174         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12175     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12176     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12177                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12178                         FiredLVal, llvm::AtomicOrdering::Unordered,
12179                         /*IsVolatile=*/true, /*isInit=*/false);
12180     return;
12181   }
12182 
12183   // Private address of the lastprivate conditional in the current context.
12184   // priv_a
12185   LValue LVal = CGF.EmitLValue(FoundE);
12186   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12187                                    FoundE->getExprLoc());
12188 }
12189 
12190 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12191     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12192     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12193   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12194     return;
12195   auto Range = llvm::reverse(LastprivateConditionalStack);
12196   auto It = llvm::find_if(
12197       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12198   if (It == Range.end() || It->Fn != CGF.CurFn)
12199     return;
12200   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12201   assert(LPCI != LastprivateConditionalToTypes.end() &&
12202          "Lastprivates must be registered already.");
12203   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12204   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12205   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12206   for (const auto &Pair : It->DeclToUniqueName) {
12207     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12208     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12209       continue;
12210     auto I = LPCI->getSecond().find(Pair.first);
12211     assert(I != LPCI->getSecond().end() &&
12212            "Lastprivate must be rehistered already.");
12213     // bool Cmp = priv_a.Fired != 0;
12214     LValue BaseLVal = std::get<3>(I->getSecond());
12215     LValue FiredLVal =
12216         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12217     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12218     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12219     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12220     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12221     // if (Cmp) {
12222     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12223     CGF.EmitBlock(ThenBB);
12224     Address Addr = CGF.GetAddrOfLocalVar(VD);
12225     LValue LVal;
12226     if (VD->getType()->isReferenceType())
12227       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12228                                            AlignmentSource::Decl);
12229     else
12230       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12231                                 AlignmentSource::Decl);
12232     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12233                                      D.getBeginLoc());
12234     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12235     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12236     // }
12237   }
12238 }
12239 
12240 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12241     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12242     SourceLocation Loc) {
12243   if (CGF.getLangOpts().OpenMP < 50)
12244     return;
12245   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12246   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12247          "Unknown lastprivate conditional variable.");
12248   StringRef UniqueName = It->second;
12249   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12250   // The variable was not updated in the region - exit.
12251   if (!GV)
12252     return;
12253   LValue LPLVal = CGF.MakeAddrLValue(
12254       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12255   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12256   CGF.EmitStoreOfScalar(Res, PrivLVal);
12257 }
12258 
12259 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12260     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12261     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12262   llvm_unreachable("Not supported in SIMD-only mode");
12263 }
12264 
12265 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12266     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12267     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12268   llvm_unreachable("Not supported in SIMD-only mode");
12269 }
12270 
12271 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12272     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12273     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12274     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12275     bool Tied, unsigned &NumberOfParts) {
12276   llvm_unreachable("Not supported in SIMD-only mode");
12277 }
12278 
12279 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12280                                            SourceLocation Loc,
12281                                            llvm::Function *OutlinedFn,
12282                                            ArrayRef<llvm::Value *> CapturedVars,
12283                                            const Expr *IfCond) {
12284   llvm_unreachable("Not supported in SIMD-only mode");
12285 }
12286 
12287 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12288     CodeGenFunction &CGF, StringRef CriticalName,
12289     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12290     const Expr *Hint) {
12291   llvm_unreachable("Not supported in SIMD-only mode");
12292 }
12293 
12294 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12295                                            const RegionCodeGenTy &MasterOpGen,
12296                                            SourceLocation Loc) {
12297   llvm_unreachable("Not supported in SIMD-only mode");
12298 }
12299 
12300 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12301                                             SourceLocation Loc) {
12302   llvm_unreachable("Not supported in SIMD-only mode");
12303 }
12304 
12305 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12306     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12307     SourceLocation Loc) {
12308   llvm_unreachable("Not supported in SIMD-only mode");
12309 }
12310 
12311 void CGOpenMPSIMDRuntime::emitSingleRegion(
12312     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12313     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12314     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12315     ArrayRef<const Expr *> AssignmentOps) {
12316   llvm_unreachable("Not supported in SIMD-only mode");
12317 }
12318 
12319 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12320                                             const RegionCodeGenTy &OrderedOpGen,
12321                                             SourceLocation Loc,
12322                                             bool IsThreads) {
12323   llvm_unreachable("Not supported in SIMD-only mode");
12324 }
12325 
12326 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12327                                           SourceLocation Loc,
12328                                           OpenMPDirectiveKind Kind,
12329                                           bool EmitChecks,
12330                                           bool ForceSimpleCall) {
12331   llvm_unreachable("Not supported in SIMD-only mode");
12332 }
12333 
12334 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12335     CodeGenFunction &CGF, SourceLocation Loc,
12336     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12337     bool Ordered, const DispatchRTInput &DispatchValues) {
12338   llvm_unreachable("Not supported in SIMD-only mode");
12339 }
12340 
12341 void CGOpenMPSIMDRuntime::emitForStaticInit(
12342     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12343     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12344   llvm_unreachable("Not supported in SIMD-only mode");
12345 }
12346 
12347 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12348     CodeGenFunction &CGF, SourceLocation Loc,
12349     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12350   llvm_unreachable("Not supported in SIMD-only mode");
12351 }
12352 
12353 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12354                                                      SourceLocation Loc,
12355                                                      unsigned IVSize,
12356                                                      bool IVSigned) {
12357   llvm_unreachable("Not supported in SIMD-only mode");
12358 }
12359 
12360 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12361                                               SourceLocation Loc,
12362                                               OpenMPDirectiveKind DKind) {
12363   llvm_unreachable("Not supported in SIMD-only mode");
12364 }
12365 
12366 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12367                                               SourceLocation Loc,
12368                                               unsigned IVSize, bool IVSigned,
12369                                               Address IL, Address LB,
12370                                               Address UB, Address ST) {
12371   llvm_unreachable("Not supported in SIMD-only mode");
12372 }
12373 
12374 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12375                                                llvm::Value *NumThreads,
12376                                                SourceLocation Loc) {
12377   llvm_unreachable("Not supported in SIMD-only mode");
12378 }
12379 
12380 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12381                                              ProcBindKind ProcBind,
12382                                              SourceLocation Loc) {
12383   llvm_unreachable("Not supported in SIMD-only mode");
12384 }
12385 
12386 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12387                                                     const VarDecl *VD,
12388                                                     Address VDAddr,
12389                                                     SourceLocation Loc) {
12390   llvm_unreachable("Not supported in SIMD-only mode");
12391 }
12392 
12393 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12394     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12395     CodeGenFunction *CGF) {
12396   llvm_unreachable("Not supported in SIMD-only mode");
12397 }
12398 
12399 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12400     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12401   llvm_unreachable("Not supported in SIMD-only mode");
12402 }
12403 
12404 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12405                                     ArrayRef<const Expr *> Vars,
12406                                     SourceLocation Loc,
12407                                     llvm::AtomicOrdering AO) {
12408   llvm_unreachable("Not supported in SIMD-only mode");
12409 }
12410 
12411 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12412                                        const OMPExecutableDirective &D,
12413                                        llvm::Function *TaskFunction,
12414                                        QualType SharedsTy, Address Shareds,
12415                                        const Expr *IfCond,
12416                                        const OMPTaskDataTy &Data) {
12417   llvm_unreachable("Not supported in SIMD-only mode");
12418 }
12419 
12420 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12421     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12422     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12423     const Expr *IfCond, const OMPTaskDataTy &Data) {
12424   llvm_unreachable("Not supported in SIMD-only mode");
12425 }
12426 
12427 void CGOpenMPSIMDRuntime::emitReduction(
12428     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12429     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12430     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12431   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12432   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12433                                  ReductionOps, Options);
12434 }
12435 
12436 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12437     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12438     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12439   llvm_unreachable("Not supported in SIMD-only mode");
12440 }
12441 
12442 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12443                                                 SourceLocation Loc,
12444                                                 bool IsWorksharingReduction) {
12445   llvm_unreachable("Not supported in SIMD-only mode");
12446 }
12447 
12448 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12449                                                   SourceLocation Loc,
12450                                                   ReductionCodeGen &RCG,
12451                                                   unsigned N) {
12452   llvm_unreachable("Not supported in SIMD-only mode");
12453 }
12454 
12455 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12456                                                   SourceLocation Loc,
12457                                                   llvm::Value *ReductionsPtr,
12458                                                   LValue SharedLVal) {
12459   llvm_unreachable("Not supported in SIMD-only mode");
12460 }
12461 
12462 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12463                                            SourceLocation Loc) {
12464   llvm_unreachable("Not supported in SIMD-only mode");
12465 }
12466 
12467 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12468     CodeGenFunction &CGF, SourceLocation Loc,
12469     OpenMPDirectiveKind CancelRegion) {
12470   llvm_unreachable("Not supported in SIMD-only mode");
12471 }
12472 
12473 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12474                                          SourceLocation Loc, const Expr *IfCond,
12475                                          OpenMPDirectiveKind CancelRegion) {
12476   llvm_unreachable("Not supported in SIMD-only mode");
12477 }
12478 
12479 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12480     const OMPExecutableDirective &D, StringRef ParentName,
12481     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12482     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12483   llvm_unreachable("Not supported in SIMD-only mode");
12484 }
12485 
12486 void CGOpenMPSIMDRuntime::emitTargetCall(
12487     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12488     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12489     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12490     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12491                                      const OMPLoopDirective &D)>
12492         SizeEmitter) {
12493   llvm_unreachable("Not supported in SIMD-only mode");
12494 }
12495 
12496 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12501   llvm_unreachable("Not supported in SIMD-only mode");
12502 }
12503 
12504 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12505   return false;
12506 }
12507 
12508 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12509                                         const OMPExecutableDirective &D,
12510                                         SourceLocation Loc,
12511                                         llvm::Function *OutlinedFn,
12512                                         ArrayRef<llvm::Value *> CapturedVars) {
12513   llvm_unreachable("Not supported in SIMD-only mode");
12514 }
12515 
12516 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12517                                              const Expr *NumTeams,
12518                                              const Expr *ThreadLimit,
12519                                              SourceLocation Loc) {
12520   llvm_unreachable("Not supported in SIMD-only mode");
12521 }
12522 
12523 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12524     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12525     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12526   llvm_unreachable("Not supported in SIMD-only mode");
12527 }
12528 
12529 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12530     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12531     const Expr *Device) {
12532   llvm_unreachable("Not supported in SIMD-only mode");
12533 }
12534 
12535 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12536                                            const OMPLoopDirective &D,
12537                                            ArrayRef<Expr *> NumIterations) {
12538   llvm_unreachable("Not supported in SIMD-only mode");
12539 }
12540 
12541 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12542                                               const OMPDependClause *C) {
12543   llvm_unreachable("Not supported in SIMD-only mode");
12544 }
12545 
12546 const VarDecl *
12547 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12548                                         const VarDecl *NativeParam) const {
12549   llvm_unreachable("Not supported in SIMD-only mode");
12550 }
12551 
12552 Address
12553 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12554                                          const VarDecl *NativeParam,
12555                                          const VarDecl *TargetParam) const {
12556   llvm_unreachable("Not supported in SIMD-only mode");
12557 }
12558