1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     if (DRD && DRD->getInitializer())
890       (void)DefaultInit(CGF);
891     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
892   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
893     (void)DefaultInit(CGF);
894     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
895                                      PrivateAddr, SharedLVal.getAddress(CGF),
896                                      SharedLVal.getType());
897   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
898              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
899     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
900                          PrivateVD->getType().getQualifiers(),
901                          /*IsInitializer=*/false);
902   }
903 }
904 
905 bool ReductionCodeGen::needCleanups(unsigned N) {
906   const auto *PrivateVD =
907       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
908   QualType PrivateType = PrivateVD->getType();
909   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
910   return DTorKind != QualType::DK_none;
911 }
912 
913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
914                                     Address PrivateAddr) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   if (needCleanups(N)) {
920     PrivateAddr = CGF.Builder.CreateElementBitCast(
921         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
922     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
923   }
924 }
925 
926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
927                           LValue BaseLV) {
928   BaseTy = BaseTy.getNonReferenceType();
929   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
930          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
931     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
932       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
933     } else {
934       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
935       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
936     }
937     BaseTy = BaseTy->getPointeeType();
938   }
939   return CGF.MakeAddrLValue(
940       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
941                                        CGF.ConvertTypeForMem(ElTy)),
942       BaseLV.getType(), BaseLV.getBaseInfo(),
943       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
944 }
945 
946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
947                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
948                           llvm::Value *Addr) {
949   Address Tmp = Address::invalid();
950   Address TopTmp = Address::invalid();
951   Address MostTopTmp = Address::invalid();
952   BaseTy = BaseTy.getNonReferenceType();
953   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
954          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
955     Tmp = CGF.CreateMemTemp(BaseTy);
956     if (TopTmp.isValid())
957       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
958     else
959       MostTopTmp = Tmp;
960     TopTmp = Tmp;
961     BaseTy = BaseTy->getPointeeType();
962   }
963   llvm::Type *Ty = BaseLVType;
964   if (Tmp.isValid())
965     Ty = Tmp.getElementType();
966   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
967   if (Tmp.isValid()) {
968     CGF.Builder.CreateStore(Addr, Tmp);
969     return MostTopTmp;
970   }
971   return Address(Addr, BaseLVAlignment);
972 }
973 
974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
975   const VarDecl *OrigVD = nullptr;
976   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
977     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
978     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
979       Base = TempOASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
985     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   }
991   return OrigVD;
992 }
993 
994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
995                                                Address PrivateAddr) {
996   const DeclRefExpr *DE;
997   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
998     BaseDecls.emplace_back(OrigVD);
999     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1000     LValue BaseLValue =
1001         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1002                     OriginalBaseLValue);
1003     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1004         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1005     llvm::Value *PrivatePointer =
1006         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007             PrivateAddr.getPointer(),
1008             SharedAddresses[N].first.getAddress(CGF).getType());
1009     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1010     return castToBase(CGF, OrigVD->getType(),
1011                       SharedAddresses[N].first.getType(),
1012                       OriginalBaseLValue.getAddress(CGF).getType(),
1013                       OriginalBaseLValue.getAlignment(), Ptr);
1014   }
1015   BaseDecls.emplace_back(
1016       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1017   return PrivateAddr;
1018 }
1019 
1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1021   const OMPDeclareReductionDecl *DRD =
1022       getReductionInit(ClausesData[N].ReductionOp);
1023   return DRD && DRD->getInitializer();
1024 }
1025 
1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1027   return CGF.EmitLoadOfPointerLValue(
1028       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1029       getThreadIDVariable()->getType()->castAs<PointerType>());
1030 }
1031 
1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1033   if (!CGF.HaveInsertPoint())
1034     return;
1035   // 1.2.2 OpenMP Language Terminology
1036   // Structured block - An executable statement with a single entry at the
1037   // top and a single exit at the bottom.
1038   // The point of exit cannot be a branch out of the structured block.
1039   // longjmp() and throw() must not violate the entry/exit criteria.
1040   CGF.EHStack.pushTerminate();
1041   CodeGen(CGF);
1042   CGF.EHStack.popTerminate();
1043 }
1044 
1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1046     CodeGenFunction &CGF) {
1047   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1048                             getThreadIDVariable()->getType(),
1049                             AlignmentSource::Decl);
1050 }
1051 
1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1053                                        QualType FieldTy) {
1054   auto *Field = FieldDecl::Create(
1055       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1056       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1057       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1058   Field->setAccess(AS_public);
1059   DC->addDecl(Field);
1060   return Field;
1061 }
1062 
1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1064                                  StringRef Separator)
1065     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1066       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1067   ASTContext &C = CGM.getContext();
1068   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1069   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070   RD->startDefinition();
1071   // reserved_1
1072   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1073   // flags
1074   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1075   // reserved_2
1076   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1077   // reserved_3
1078   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1079   // psource
1080   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1081   RD->completeDefinition();
1082   IdentQTy = C.getRecordType(RD);
1083   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1244         {FiniCB, OMPD_parallel, HasCancel});
1245     OMPBuilder->pushFinalizationCB(std::move(FI));
1246   }
1247   ~PushAndPopStackRAII() {
1248     if (OMPBuilder)
1249       OMPBuilder->popFinalizationCB();
1250   }
1251   llvm::OpenMPIRBuilder *OMPBuilder;
1252 };
1253 } // namespace
1254 
1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1256     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1257     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1258     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1259   assert(ThreadIDVar->getType()->isPointerType() &&
1260          "thread id variable must be of type kmp_int32 *");
1261   CodeGenFunction CGF(CGM, true);
1262   bool HasCancel = false;
1263   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1266     HasCancel = OPD->hasCancel();
1267   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1268     HasCancel = OPSD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278   else if (const auto *OPFD =
1279                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1280     HasCancel = OPFD->hasCancel();
1281 
1282   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1283   //       parallel region to make cancellation barriers work properly.
1284   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1285   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1286   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1287                                     HasCancel, OutlinedHelperName);
1288   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1289   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1290 }
1291 
1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1303   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1304   return emitParallelOrTeamsOutlinedFunction(
1305       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1306 }
1307 
1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1309     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1310     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1311     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1312     bool Tied, unsigned &NumberOfParts) {
1313   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1314                                               PrePostActionTy &) {
1315     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1316     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1317     llvm::Value *TaskArgs[] = {
1318         UpLoc, ThreadID,
1319         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1320                                     TaskTVar->getType()->castAs<PointerType>())
1321             .getPointer(CGF)};
1322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1323                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1324                         TaskArgs);
1325   };
1326   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1327                                                             UntiedCodeGen);
1328   CodeGen.setAction(Action);
1329   assert(!ThreadIDVar->getType()->isPointerType() &&
1330          "thread id variable must be of type kmp_int32 for tasks");
1331   const OpenMPDirectiveKind Region =
1332       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1333                                                       : OMPD_task;
1334   const CapturedStmt *CS = D.getCapturedStmt(Region);
1335   bool HasCancel = false;
1336   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1343     HasCancel = TD->hasCancel();
1344 
1345   CodeGenFunction CGF(CGM, true);
1346   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1347                                         InnermostKind, HasCancel, Action);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1350   if (!Tied)
1351     NumberOfParts = Action.getNumberOfParts();
1352   return Res;
1353 }
1354 
1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1356                              const RecordDecl *RD, const CGRecordLayout &RL,
1357                              ArrayRef<llvm::Constant *> Data) {
1358   llvm::StructType *StructTy = RL.getLLVMType();
1359   unsigned PrevIdx = 0;
1360   ConstantInitBuilder CIBuilder(CGM);
1361   auto DI = Data.begin();
1362   for (const FieldDecl *FD : RD->fields()) {
1363     unsigned Idx = RL.getLLVMFieldNo(FD);
1364     // Fill the alignment.
1365     for (unsigned I = PrevIdx; I < Idx; ++I)
1366       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1367     PrevIdx = Idx + 1;
1368     Fields.add(*DI);
1369     ++DI;
1370   }
1371 }
1372 
1373 template <class... As>
1374 static llvm::GlobalVariable *
1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1376                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1377                    As &&... Args) {
1378   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1379   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1380   ConstantInitBuilder CIBuilder(CGM);
1381   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1382   buildStructValue(Fields, CGM, RD, RL, Data);
1383   return Fields.finishAndCreateGlobal(
1384       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1385       std::forward<As>(Args)...);
1386 }
1387 
1388 template <typename T>
1389 static void
1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1391                                          ArrayRef<llvm::Constant *> Data,
1392                                          T &Parent) {
1393   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1394   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1395   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1396   buildStructValue(Fields, CGM, RD, RL, Data);
1397   Fields.finishAndAddTo(Parent);
1398 }
1399 
1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1401   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1402   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1403   FlagsTy FlagsKey(Flags, Reserved2Flags);
1404   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1405   if (!Entry) {
1406     if (!DefaultOpenMPPSource) {
1407       // Initialize default location for psource field of ident_t structure of
1408       // all ident_t objects. Format is ";file;function;line;column;;".
1409       // Taken from
1410       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1411       DefaultOpenMPPSource =
1412           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1413       DefaultOpenMPPSource =
1414           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1415     }
1416 
1417     llvm::Constant *Data[] = {
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1419         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1420         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1421         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1422     llvm::GlobalValue *DefaultOpenMPLocation =
1423         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1424                            llvm::GlobalValue::PrivateLinkage);
1425     DefaultOpenMPLocation->setUnnamedAddr(
1426         llvm::GlobalValue::UnnamedAddr::Global);
1427 
1428     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1429   }
1430   return Address(Entry, Align);
1431 }
1432 
1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1434                                              bool AtCurrentPoint) {
1435   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1436   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1437 
1438   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1439   if (AtCurrentPoint) {
1440     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1441         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1442   } else {
1443     Elem.second.ServiceInsertPt =
1444         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1445     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1446   }
1447 }
1448 
1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1450   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1451   if (Elem.second.ServiceInsertPt) {
1452     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1453     Elem.second.ServiceInsertPt = nullptr;
1454     Ptr->eraseFromParent();
1455   }
1456 }
1457 
1458 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1459                                                   SourceLocation Loc,
1460                                                   SmallString<128> &Buffer) {
1461   llvm::raw_svector_ostream OS(Buffer);
1462   // Build debug location
1463   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1464   OS << ";" << PLoc.getFilename() << ";";
1465   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1466     OS << FD->getQualifiedNameAsString();
1467   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1468   return OS.str();
1469 }
1470 
1471 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1472                                                  SourceLocation Loc,
1473                                                  unsigned Flags) {
1474   Flags |= OMP_IDENT_KMPC;
1475   // If no debug info is generated - return global default location.
1476   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1477       Loc.isInvalid())
1478     return getOrCreateDefaultLocation(Flags).getPointer();
1479 
1480   // If the OpenMPIRBuilder is used we need to use it for all location handling
1481   // as the clang invariants used below might be broken.
1482   if (CGM.getLangOpts().OpenMPIRBuilder) {
1483     SmallString<128> Buffer;
1484     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1485     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1486         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1487     return OMPBuilder.getOrCreateIdent(SrcLocStr, IdentFlag(Flags));
1488   }
1489 
1490   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1491 
1492   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1493   Address LocValue = Address::invalid();
1494   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1495   if (I != OpenMPLocThreadIDMap.end())
1496     LocValue = Address(I->second.DebugLoc, Align);
1497 
1498   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1499   // GetOpenMPThreadID was called before this routine.
1500   if (!LocValue.isValid()) {
1501     // Generate "ident_t .kmpc_loc.addr;"
1502     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1503     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1504     Elem.second.DebugLoc = AI.getPointer();
1505     LocValue = AI;
1506 
1507     if (!Elem.second.ServiceInsertPt)
1508       setLocThreadIdInsertPt(CGF);
1509     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1510     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1511     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1512                              CGF.getTypeSize(IdentQTy));
1513   }
1514 
1515   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1516   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1517   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1518   LValue PSource =
1519       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1520 
1521   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1522   if (OMPDebugLoc == nullptr) {
1523     SmallString<128> Buffer;
1524     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(
1525         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1526     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1527   }
1528   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1529   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1530 
1531   // Our callers always pass this to a runtime function, so for
1532   // convenience, go ahead and return a naked pointer.
1533   return LocValue.getPointer();
1534 }
1535 
1536 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1537                                           SourceLocation Loc) {
1538   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1539   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1540   // the clang invariants used below might be broken.
1541   if (CGM.getLangOpts().OpenMPIRBuilder) {
1542     SmallString<128> Buffer;
1543     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1544     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1545         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1546     return OMPBuilder.getOrCreateThreadID(
1547         OMPBuilder.getOrCreateIdent(SrcLocStr));
1548   }
1549 
1550   llvm::Value *ThreadID = nullptr;
1551   // Check whether we've already cached a load of the thread id in this
1552   // function.
1553   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1554   if (I != OpenMPLocThreadIDMap.end()) {
1555     ThreadID = I->second.ThreadID;
1556     if (ThreadID != nullptr)
1557       return ThreadID;
1558   }
1559   // If exceptions are enabled, do not use parameter to avoid possible crash.
1560   if (auto *OMPRegionInfo =
1561           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1562     if (OMPRegionInfo->getThreadIDVariable()) {
1563       // Check if this an outlined function with thread id passed as argument.
1564       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1565       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1566       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1567           !CGF.getLangOpts().CXXExceptions ||
1568           CGF.Builder.GetInsertBlock() == TopBlock ||
1569           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1570           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1571               TopBlock ||
1572           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1573               CGF.Builder.GetInsertBlock()) {
1574         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1575         // If value loaded in entry block, cache it and use it everywhere in
1576         // function.
1577         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1578           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1579           Elem.second.ThreadID = ThreadID;
1580         }
1581         return ThreadID;
1582       }
1583     }
1584   }
1585 
1586   // This is not an outlined function region - need to call __kmpc_int32
1587   // kmpc_global_thread_num(ident_t *loc).
1588   // Generate thread id value and cache this value for use across the
1589   // function.
1590   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1591   if (!Elem.second.ServiceInsertPt)
1592     setLocThreadIdInsertPt(CGF);
1593   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1594   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1595   llvm::CallInst *Call = CGF.Builder.CreateCall(
1596       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1597                                             OMPRTL___kmpc_global_thread_num),
1598       emitUpdateLocation(CGF, Loc));
1599   Call->setCallingConv(CGF.getRuntimeCC());
1600   Elem.second.ThreadID = Call;
1601   return Call;
1602 }
1603 
1604 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1605   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1606   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1607     clearLocThreadIdInsertPt(CGF);
1608     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1609   }
1610   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1611     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1612       UDRMap.erase(D);
1613     FunctionUDRMap.erase(CGF.CurFn);
1614   }
1615   auto I = FunctionUDMMap.find(CGF.CurFn);
1616   if (I != FunctionUDMMap.end()) {
1617     for(const auto *D : I->second)
1618       UDMMap.erase(D);
1619     FunctionUDMMap.erase(I);
1620   }
1621   LastprivateConditionalToTypes.erase(CGF.CurFn);
1622 }
1623 
1624 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1625   return IdentTy->getPointerTo();
1626 }
1627 
1628 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1629   if (!Kmpc_MicroTy) {
1630     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1631     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1632                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1633     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1634   }
1635   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1636 }
1637 
1638 llvm::FunctionCallee
1639 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1640   assert((IVSize == 32 || IVSize == 64) &&
1641          "IV size is not compatible with the omp runtime");
1642   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1643                                             : "__kmpc_for_static_init_4u")
1644                                 : (IVSigned ? "__kmpc_for_static_init_8"
1645                                             : "__kmpc_for_static_init_8u");
1646   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1647   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1648   llvm::Type *TypeParams[] = {
1649     getIdentTyPointerTy(),                     // loc
1650     CGM.Int32Ty,                               // tid
1651     CGM.Int32Ty,                               // schedtype
1652     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1653     PtrTy,                                     // p_lower
1654     PtrTy,                                     // p_upper
1655     PtrTy,                                     // p_stride
1656     ITy,                                       // incr
1657     ITy                                        // chunk
1658   };
1659   auto *FnTy =
1660       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1661   return CGM.CreateRuntimeFunction(FnTy, Name);
1662 }
1663 
1664 llvm::FunctionCallee
1665 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1666   assert((IVSize == 32 || IVSize == 64) &&
1667          "IV size is not compatible with the omp runtime");
1668   StringRef Name =
1669       IVSize == 32
1670           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1671           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1672   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1673   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1674                                CGM.Int32Ty,           // tid
1675                                CGM.Int32Ty,           // schedtype
1676                                ITy,                   // lower
1677                                ITy,                   // upper
1678                                ITy,                   // stride
1679                                ITy                    // chunk
1680   };
1681   auto *FnTy =
1682       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1683   return CGM.CreateRuntimeFunction(FnTy, Name);
1684 }
1685 
1686 llvm::FunctionCallee
1687 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1688   assert((IVSize == 32 || IVSize == 64) &&
1689          "IV size is not compatible with the omp runtime");
1690   StringRef Name =
1691       IVSize == 32
1692           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1693           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1694   llvm::Type *TypeParams[] = {
1695       getIdentTyPointerTy(), // loc
1696       CGM.Int32Ty,           // tid
1697   };
1698   auto *FnTy =
1699       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1700   return CGM.CreateRuntimeFunction(FnTy, Name);
1701 }
1702 
1703 llvm::FunctionCallee
1704 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1705   assert((IVSize == 32 || IVSize == 64) &&
1706          "IV size is not compatible with the omp runtime");
1707   StringRef Name =
1708       IVSize == 32
1709           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1710           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1711   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1712   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1713   llvm::Type *TypeParams[] = {
1714     getIdentTyPointerTy(),                     // loc
1715     CGM.Int32Ty,                               // tid
1716     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1717     PtrTy,                                     // p_lower
1718     PtrTy,                                     // p_upper
1719     PtrTy                                      // p_stride
1720   };
1721   auto *FnTy =
1722       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1723   return CGM.CreateRuntimeFunction(FnTy, Name);
1724 }
1725 
1726 /// Obtain information that uniquely identifies a target entry. This
1727 /// consists of the file and device IDs as well as line number associated with
1728 /// the relevant entry source location.
1729 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1730                                      unsigned &DeviceID, unsigned &FileID,
1731                                      unsigned &LineNum) {
1732   SourceManager &SM = C.getSourceManager();
1733 
1734   // The loc should be always valid and have a file ID (the user cannot use
1735   // #pragma directives in macros)
1736 
1737   assert(Loc.isValid() && "Source location is expected to be always valid.");
1738 
1739   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1740   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1741 
1742   llvm::sys::fs::UniqueID ID;
1743   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1744     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1745         << PLoc.getFilename() << EC.message();
1746 
1747   DeviceID = ID.getDevice();
1748   FileID = ID.getFile();
1749   LineNum = PLoc.getLine();
1750 }
1751 
1752 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1753   if (CGM.getLangOpts().OpenMPSimd)
1754     return Address::invalid();
1755   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1756       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1757   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1758               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1759                HasRequiresUnifiedSharedMemory))) {
1760     SmallString<64> PtrName;
1761     {
1762       llvm::raw_svector_ostream OS(PtrName);
1763       OS << CGM.getMangledName(GlobalDecl(VD));
1764       if (!VD->isExternallyVisible()) {
1765         unsigned DeviceID, FileID, Line;
1766         getTargetEntryUniqueInfo(CGM.getContext(),
1767                                  VD->getCanonicalDecl()->getBeginLoc(),
1768                                  DeviceID, FileID, Line);
1769         OS << llvm::format("_%x", FileID);
1770       }
1771       OS << "_decl_tgt_ref_ptr";
1772     }
1773     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1774     if (!Ptr) {
1775       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1776       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1777                                         PtrName);
1778 
1779       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1780       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1781 
1782       if (!CGM.getLangOpts().OpenMPIsDevice)
1783         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1784       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1785     }
1786     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1787   }
1788   return Address::invalid();
1789 }
1790 
1791 llvm::Constant *
1792 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1793   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1794          !CGM.getContext().getTargetInfo().isTLSSupported());
1795   // Lookup the entry, lazily creating it if necessary.
1796   std::string Suffix = getName({"cache", ""});
1797   return getOrCreateInternalVariable(
1798       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1799 }
1800 
1801 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1802                                                 const VarDecl *VD,
1803                                                 Address VDAddr,
1804                                                 SourceLocation Loc) {
1805   if (CGM.getLangOpts().OpenMPUseTLS &&
1806       CGM.getContext().getTargetInfo().isTLSSupported())
1807     return VDAddr;
1808 
1809   llvm::Type *VarTy = VDAddr.getElementType();
1810   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1811                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1812                                                        CGM.Int8PtrTy),
1813                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1814                          getOrCreateThreadPrivateCache(VD)};
1815   return Address(CGF.EmitRuntimeCall(
1816                      OMPBuilder.getOrCreateRuntimeFunction(
1817                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1818                      Args),
1819                  VDAddr.getAlignment());
1820 }
1821 
1822 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1823     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1824     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1825   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1826   // library.
1827   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1828   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1829                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1830                       OMPLoc);
1831   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1832   // to register constructor/destructor for variable.
1833   llvm::Value *Args[] = {
1834       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1835       Ctor, CopyCtor, Dtor};
1836   CGF.EmitRuntimeCall(
1837       OMPBuilder.getOrCreateRuntimeFunction(
1838           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1839       Args);
1840 }
1841 
1842 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1843     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1844     bool PerformInit, CodeGenFunction *CGF) {
1845   if (CGM.getLangOpts().OpenMPUseTLS &&
1846       CGM.getContext().getTargetInfo().isTLSSupported())
1847     return nullptr;
1848 
1849   VD = VD->getDefinition(CGM.getContext());
1850   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1851     QualType ASTTy = VD->getType();
1852 
1853     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1854     const Expr *Init = VD->getAnyInitializer();
1855     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1856       // Generate function that re-emits the declaration's initializer into the
1857       // threadprivate copy of the variable VD
1858       CodeGenFunction CtorCGF(CGM);
1859       FunctionArgList Args;
1860       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1861                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1862                             ImplicitParamDecl::Other);
1863       Args.push_back(&Dst);
1864 
1865       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1866           CGM.getContext().VoidPtrTy, Args);
1867       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1868       std::string Name = getName({"__kmpc_global_ctor_", ""});
1869       llvm::Function *Fn =
1870           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1871       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1872                             Args, Loc, Loc);
1873       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1874           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1875           CGM.getContext().VoidPtrTy, Dst.getLocation());
1876       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1877       Arg = CtorCGF.Builder.CreateElementBitCast(
1878           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1879       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1880                                /*IsInitializer=*/true);
1881       ArgVal = CtorCGF.EmitLoadOfScalar(
1882           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1883           CGM.getContext().VoidPtrTy, Dst.getLocation());
1884       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1885       CtorCGF.FinishFunction();
1886       Ctor = Fn;
1887     }
1888     if (VD->getType().isDestructedType() != QualType::DK_none) {
1889       // Generate function that emits destructor call for the threadprivate copy
1890       // of the variable VD
1891       CodeGenFunction DtorCGF(CGM);
1892       FunctionArgList Args;
1893       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1894                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1895                             ImplicitParamDecl::Other);
1896       Args.push_back(&Dst);
1897 
1898       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1899           CGM.getContext().VoidTy, Args);
1900       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1901       std::string Name = getName({"__kmpc_global_dtor_", ""});
1902       llvm::Function *Fn =
1903           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1904       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1905       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1906                             Loc, Loc);
1907       // Create a scope with an artificial location for the body of this function.
1908       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1909       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1910           DtorCGF.GetAddrOfLocalVar(&Dst),
1911           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1912       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1913                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1914                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1915       DtorCGF.FinishFunction();
1916       Dtor = Fn;
1917     }
1918     // Do not emit init function if it is not required.
1919     if (!Ctor && !Dtor)
1920       return nullptr;
1921 
1922     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1923     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1924                                                /*isVarArg=*/false)
1925                            ->getPointerTo();
1926     // Copying constructor for the threadprivate variable.
1927     // Must be NULL - reserved by runtime, but currently it requires that this
1928     // parameter is always NULL. Otherwise it fires assertion.
1929     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1930     if (Ctor == nullptr) {
1931       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1932                                              /*isVarArg=*/false)
1933                          ->getPointerTo();
1934       Ctor = llvm::Constant::getNullValue(CtorTy);
1935     }
1936     if (Dtor == nullptr) {
1937       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1938                                              /*isVarArg=*/false)
1939                          ->getPointerTo();
1940       Dtor = llvm::Constant::getNullValue(DtorTy);
1941     }
1942     if (!CGF) {
1943       auto *InitFunctionTy =
1944           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1945       std::string Name = getName({"__omp_threadprivate_init_", ""});
1946       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1947           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1948       CodeGenFunction InitCGF(CGM);
1949       FunctionArgList ArgList;
1950       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1951                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1952                             Loc, Loc);
1953       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1954       InitCGF.FinishFunction();
1955       return InitFunction;
1956     }
1957     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1958   }
1959   return nullptr;
1960 }
1961 
1962 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1963                                                      llvm::GlobalVariable *Addr,
1964                                                      bool PerformInit) {
1965   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1966       !CGM.getLangOpts().OpenMPIsDevice)
1967     return false;
1968   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1969       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1970   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1971       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1972        HasRequiresUnifiedSharedMemory))
1973     return CGM.getLangOpts().OpenMPIsDevice;
1974   VD = VD->getDefinition(CGM.getContext());
1975   assert(VD && "Unknown VarDecl");
1976 
1977   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1978     return CGM.getLangOpts().OpenMPIsDevice;
1979 
1980   QualType ASTTy = VD->getType();
1981   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1982 
1983   // Produce the unique prefix to identify the new target regions. We use
1984   // the source location of the variable declaration which we know to not
1985   // conflict with any target region.
1986   unsigned DeviceID;
1987   unsigned FileID;
1988   unsigned Line;
1989   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1990   SmallString<128> Buffer, Out;
1991   {
1992     llvm::raw_svector_ostream OS(Buffer);
1993     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1994        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1995   }
1996 
1997   const Expr *Init = VD->getAnyInitializer();
1998   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1999     llvm::Constant *Ctor;
2000     llvm::Constant *ID;
2001     if (CGM.getLangOpts().OpenMPIsDevice) {
2002       // Generate function that re-emits the declaration's initializer into
2003       // the threadprivate copy of the variable VD
2004       CodeGenFunction CtorCGF(CGM);
2005 
2006       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2007       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2008       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2009           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2010       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2011       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2012                             FunctionArgList(), Loc, Loc);
2013       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2014       CtorCGF.EmitAnyExprToMem(Init,
2015                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2016                                Init->getType().getQualifiers(),
2017                                /*IsInitializer=*/true);
2018       CtorCGF.FinishFunction();
2019       Ctor = Fn;
2020       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2021       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2022     } else {
2023       Ctor = new llvm::GlobalVariable(
2024           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2025           llvm::GlobalValue::PrivateLinkage,
2026           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2027       ID = Ctor;
2028     }
2029 
2030     // Register the information for the entry associated with the constructor.
2031     Out.clear();
2032     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2033         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2034         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2035   }
2036   if (VD->getType().isDestructedType() != QualType::DK_none) {
2037     llvm::Constant *Dtor;
2038     llvm::Constant *ID;
2039     if (CGM.getLangOpts().OpenMPIsDevice) {
2040       // Generate function that emits destructor call for the threadprivate
2041       // copy of the variable VD
2042       CodeGenFunction DtorCGF(CGM);
2043 
2044       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2045       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2046       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2047           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2048       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2049       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2050                             FunctionArgList(), Loc, Loc);
2051       // Create a scope with an artificial location for the body of this
2052       // function.
2053       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2054       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2055                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2056                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2057       DtorCGF.FinishFunction();
2058       Dtor = Fn;
2059       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2060       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2061     } else {
2062       Dtor = new llvm::GlobalVariable(
2063           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2064           llvm::GlobalValue::PrivateLinkage,
2065           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2066       ID = Dtor;
2067     }
2068     // Register the information for the entry associated with the destructor.
2069     Out.clear();
2070     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2071         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2072         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2073   }
2074   return CGM.getLangOpts().OpenMPIsDevice;
2075 }
2076 
2077 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2078                                                           QualType VarType,
2079                                                           StringRef Name) {
2080   std::string Suffix = getName({"artificial", ""});
2081   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2082   llvm::Value *GAddr =
2083       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2084   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2085       CGM.getTarget().isTLSSupported()) {
2086     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2087     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2088   }
2089   std::string CacheSuffix = getName({"cache", ""});
2090   llvm::Value *Args[] = {
2091       emitUpdateLocation(CGF, SourceLocation()),
2092       getThreadID(CGF, SourceLocation()),
2093       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2094       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2095                                 /*isSigned=*/false),
2096       getOrCreateInternalVariable(
2097           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2098   return Address(
2099       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2100           CGF.EmitRuntimeCall(
2101               OMPBuilder.getOrCreateRuntimeFunction(
2102                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2103               Args),
2104           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2105       CGM.getContext().getTypeAlignInChars(VarType));
2106 }
2107 
2108 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2109                                    const RegionCodeGenTy &ThenGen,
2110                                    const RegionCodeGenTy &ElseGen) {
2111   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2112 
2113   // If the condition constant folds and can be elided, try to avoid emitting
2114   // the condition and the dead arm of the if/else.
2115   bool CondConstant;
2116   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2117     if (CondConstant)
2118       ThenGen(CGF);
2119     else
2120       ElseGen(CGF);
2121     return;
2122   }
2123 
2124   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2125   // emit the conditional branch.
2126   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2127   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2128   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2129   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2130 
2131   // Emit the 'then' code.
2132   CGF.EmitBlock(ThenBlock);
2133   ThenGen(CGF);
2134   CGF.EmitBranch(ContBlock);
2135   // Emit the 'else' code if present.
2136   // There is no need to emit line number for unconditional branch.
2137   (void)ApplyDebugLocation::CreateEmpty(CGF);
2138   CGF.EmitBlock(ElseBlock);
2139   ElseGen(CGF);
2140   // There is no need to emit line number for unconditional branch.
2141   (void)ApplyDebugLocation::CreateEmpty(CGF);
2142   CGF.EmitBranch(ContBlock);
2143   // Emit the continuation block for code after the if.
2144   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2145 }
2146 
2147 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2148                                        llvm::Function *OutlinedFn,
2149                                        ArrayRef<llvm::Value *> CapturedVars,
2150                                        const Expr *IfCond) {
2151   if (!CGF.HaveInsertPoint())
2152     return;
2153   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2154   auto &M = CGM.getModule();
2155   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2156                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2157     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2158     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2159     llvm::Value *Args[] = {
2160         RTLoc,
2161         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2162         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2163     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2164     RealArgs.append(std::begin(Args), std::end(Args));
2165     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2166 
2167     llvm::FunctionCallee RTLFn =
2168         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2169     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2170   };
2171   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2172                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2173     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2174     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2175     // Build calls:
2176     // __kmpc_serialized_parallel(&Loc, GTid);
2177     llvm::Value *Args[] = {RTLoc, ThreadID};
2178     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2179                             M, OMPRTL___kmpc_serialized_parallel),
2180                         Args);
2181 
2182     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2183     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2184     Address ZeroAddrBound =
2185         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2186                                          /*Name=*/".bound.zero.addr");
2187     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2188     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2189     // ThreadId for serialized parallels is 0.
2190     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2191     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2192     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2193     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2194 
2195     // __kmpc_end_serialized_parallel(&Loc, GTid);
2196     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2197     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2198                             M, OMPRTL___kmpc_end_serialized_parallel),
2199                         EndArgs);
2200   };
2201   if (IfCond) {
2202     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2203   } else {
2204     RegionCodeGenTy ThenRCG(ThenGen);
2205     ThenRCG(CGF);
2206   }
2207 }
2208 
2209 // If we're inside an (outlined) parallel region, use the region info's
2210 // thread-ID variable (it is passed in a first argument of the outlined function
2211 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2212 // regular serial code region, get thread ID by calling kmp_int32
2213 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2214 // return the address of that temp.
2215 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2216                                              SourceLocation Loc) {
2217   if (auto *OMPRegionInfo =
2218           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2219     if (OMPRegionInfo->getThreadIDVariable())
2220       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2221 
2222   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2223   QualType Int32Ty =
2224       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2225   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2226   CGF.EmitStoreOfScalar(ThreadID,
2227                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2228 
2229   return ThreadIDTemp;
2230 }
2231 
2232 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2233     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2234   SmallString<256> Buffer;
2235   llvm::raw_svector_ostream Out(Buffer);
2236   Out << Name;
2237   StringRef RuntimeName = Out.str();
2238   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2239   if (Elem.second) {
2240     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2241            "OMP internal variable has different type than requested");
2242     return &*Elem.second;
2243   }
2244 
2245   return Elem.second = new llvm::GlobalVariable(
2246              CGM.getModule(), Ty, /*IsConstant*/ false,
2247              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2248              Elem.first(), /*InsertBefore=*/nullptr,
2249              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2250 }
2251 
2252 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2253   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2254   std::string Name = getName({Prefix, "var"});
2255   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2256 }
2257 
2258 namespace {
2259 /// Common pre(post)-action for different OpenMP constructs.
2260 class CommonActionTy final : public PrePostActionTy {
2261   llvm::FunctionCallee EnterCallee;
2262   ArrayRef<llvm::Value *> EnterArgs;
2263   llvm::FunctionCallee ExitCallee;
2264   ArrayRef<llvm::Value *> ExitArgs;
2265   bool Conditional;
2266   llvm::BasicBlock *ContBlock = nullptr;
2267 
2268 public:
2269   CommonActionTy(llvm::FunctionCallee EnterCallee,
2270                  ArrayRef<llvm::Value *> EnterArgs,
2271                  llvm::FunctionCallee ExitCallee,
2272                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2273       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2274         ExitArgs(ExitArgs), Conditional(Conditional) {}
2275   void Enter(CodeGenFunction &CGF) override {
2276     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2277     if (Conditional) {
2278       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2279       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2280       ContBlock = CGF.createBasicBlock("omp_if.end");
2281       // Generate the branch (If-stmt)
2282       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2283       CGF.EmitBlock(ThenBlock);
2284     }
2285   }
2286   void Done(CodeGenFunction &CGF) {
2287     // Emit the rest of blocks/branches
2288     CGF.EmitBranch(ContBlock);
2289     CGF.EmitBlock(ContBlock, true);
2290   }
2291   void Exit(CodeGenFunction &CGF) override {
2292     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2293   }
2294 };
2295 } // anonymous namespace
2296 
2297 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2298                                          StringRef CriticalName,
2299                                          const RegionCodeGenTy &CriticalOpGen,
2300                                          SourceLocation Loc, const Expr *Hint) {
2301   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2302   // CriticalOpGen();
2303   // __kmpc_end_critical(ident_t *, gtid, Lock);
2304   // Prepare arguments and build a call to __kmpc_critical
2305   if (!CGF.HaveInsertPoint())
2306     return;
2307   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2308                          getCriticalRegionLock(CriticalName)};
2309   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2310                                                 std::end(Args));
2311   if (Hint) {
2312     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2313         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2314   }
2315   CommonActionTy Action(
2316       OMPBuilder.getOrCreateRuntimeFunction(
2317           CGM.getModule(),
2318           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2319       EnterArgs,
2320       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2321                                             OMPRTL___kmpc_end_critical),
2322       Args);
2323   CriticalOpGen.setAction(Action);
2324   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2325 }
2326 
2327 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2328                                        const RegionCodeGenTy &MasterOpGen,
2329                                        SourceLocation Loc) {
2330   if (!CGF.HaveInsertPoint())
2331     return;
2332   // if(__kmpc_master(ident_t *, gtid)) {
2333   //   MasterOpGen();
2334   //   __kmpc_end_master(ident_t *, gtid);
2335   // }
2336   // Prepare arguments and build a call to __kmpc_master
2337   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2338   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2339                             CGM.getModule(), OMPRTL___kmpc_master),
2340                         Args,
2341                         OMPBuilder.getOrCreateRuntimeFunction(
2342                             CGM.getModule(), OMPRTL___kmpc_end_master),
2343                         Args,
2344                         /*Conditional=*/true);
2345   MasterOpGen.setAction(Action);
2346   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2347   Action.Done(CGF);
2348 }
2349 
2350 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2351                                         SourceLocation Loc) {
2352   if (!CGF.HaveInsertPoint())
2353     return;
2354   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2355     OMPBuilder.CreateTaskyield(CGF.Builder);
2356   } else {
2357     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2358     llvm::Value *Args[] = {
2359         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2360         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2361     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2362                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2363                         Args);
2364   }
2365 
2366   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2367     Region->emitUntiedSwitch(CGF);
2368 }
2369 
2370 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2371                                           const RegionCodeGenTy &TaskgroupOpGen,
2372                                           SourceLocation Loc) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   // __kmpc_taskgroup(ident_t *, gtid);
2376   // TaskgroupOpGen();
2377   // __kmpc_end_taskgroup(ident_t *, gtid);
2378   // Prepare arguments and build a call to __kmpc_taskgroup
2379   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2380   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2381                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2382                         Args,
2383                         OMPBuilder.getOrCreateRuntimeFunction(
2384                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2385                         Args);
2386   TaskgroupOpGen.setAction(Action);
2387   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2388 }
2389 
2390 /// Given an array of pointers to variables, project the address of a
2391 /// given variable.
2392 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2393                                       unsigned Index, const VarDecl *Var) {
2394   // Pull out the pointer to the variable.
2395   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2396   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2397 
2398   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2399   Addr = CGF.Builder.CreateElementBitCast(
2400       Addr, CGF.ConvertTypeForMem(Var->getType()));
2401   return Addr;
2402 }
2403 
2404 static llvm::Value *emitCopyprivateCopyFunction(
2405     CodeGenModule &CGM, llvm::Type *ArgsType,
2406     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2407     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2408     SourceLocation Loc) {
2409   ASTContext &C = CGM.getContext();
2410   // void copy_func(void *LHSArg, void *RHSArg);
2411   FunctionArgList Args;
2412   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2413                            ImplicitParamDecl::Other);
2414   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2415                            ImplicitParamDecl::Other);
2416   Args.push_back(&LHSArg);
2417   Args.push_back(&RHSArg);
2418   const auto &CGFI =
2419       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2420   std::string Name =
2421       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2422   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2423                                     llvm::GlobalValue::InternalLinkage, Name,
2424                                     &CGM.getModule());
2425   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2426   Fn->setDoesNotRecurse();
2427   CodeGenFunction CGF(CGM);
2428   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2429   // Dest = (void*[n])(LHSArg);
2430   // Src = (void*[n])(RHSArg);
2431   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2432       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2433       ArgsType), CGF.getPointerAlign());
2434   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2435       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2436       ArgsType), CGF.getPointerAlign());
2437   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2438   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2439   // ...
2440   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2441   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2442     const auto *DestVar =
2443         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2444     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2445 
2446     const auto *SrcVar =
2447         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2448     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2449 
2450     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2451     QualType Type = VD->getType();
2452     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2453   }
2454   CGF.FinishFunction();
2455   return Fn;
2456 }
2457 
2458 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2459                                        const RegionCodeGenTy &SingleOpGen,
2460                                        SourceLocation Loc,
2461                                        ArrayRef<const Expr *> CopyprivateVars,
2462                                        ArrayRef<const Expr *> SrcExprs,
2463                                        ArrayRef<const Expr *> DstExprs,
2464                                        ArrayRef<const Expr *> AssignmentOps) {
2465   if (!CGF.HaveInsertPoint())
2466     return;
2467   assert(CopyprivateVars.size() == SrcExprs.size() &&
2468          CopyprivateVars.size() == DstExprs.size() &&
2469          CopyprivateVars.size() == AssignmentOps.size());
2470   ASTContext &C = CGM.getContext();
2471   // int32 did_it = 0;
2472   // if(__kmpc_single(ident_t *, gtid)) {
2473   //   SingleOpGen();
2474   //   __kmpc_end_single(ident_t *, gtid);
2475   //   did_it = 1;
2476   // }
2477   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2478   // <copy_func>, did_it);
2479 
2480   Address DidIt = Address::invalid();
2481   if (!CopyprivateVars.empty()) {
2482     // int32 did_it = 0;
2483     QualType KmpInt32Ty =
2484         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2485     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2486     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2487   }
2488   // Prepare arguments and build a call to __kmpc_single
2489   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491                             CGM.getModule(), OMPRTL___kmpc_single),
2492                         Args,
2493                         OMPBuilder.getOrCreateRuntimeFunction(
2494                             CGM.getModule(), OMPRTL___kmpc_end_single),
2495                         Args,
2496                         /*Conditional=*/true);
2497   SingleOpGen.setAction(Action);
2498   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2499   if (DidIt.isValid()) {
2500     // did_it = 1;
2501     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2502   }
2503   Action.Done(CGF);
2504   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2505   // <copy_func>, did_it);
2506   if (DidIt.isValid()) {
2507     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2508     QualType CopyprivateArrayTy = C.getConstantArrayType(
2509         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2510         /*IndexTypeQuals=*/0);
2511     // Create a list of all private variables for copyprivate.
2512     Address CopyprivateList =
2513         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2514     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2515       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2516       CGF.Builder.CreateStore(
2517           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2518               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2519               CGF.VoidPtrTy),
2520           Elem);
2521     }
2522     // Build function that copies private values from single region to all other
2523     // threads in the corresponding parallel region.
2524     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2525         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2526         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2527     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2528     Address CL =
2529       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2530                                                       CGF.VoidPtrTy);
2531     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2532     llvm::Value *Args[] = {
2533         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2534         getThreadID(CGF, Loc),        // i32 <gtid>
2535         BufSize,                      // size_t <buf_size>
2536         CL.getPointer(),              // void *<copyprivate list>
2537         CpyFn,                        // void (*) (void *, void *) <copy_func>
2538         DidItVal                      // i32 did_it
2539     };
2540     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2541                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2542                         Args);
2543   }
2544 }
2545 
2546 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2547                                         const RegionCodeGenTy &OrderedOpGen,
2548                                         SourceLocation Loc, bool IsThreads) {
2549   if (!CGF.HaveInsertPoint())
2550     return;
2551   // __kmpc_ordered(ident_t *, gtid);
2552   // OrderedOpGen();
2553   // __kmpc_end_ordered(ident_t *, gtid);
2554   // Prepare arguments and build a call to __kmpc_ordered
2555   if (IsThreads) {
2556     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2557     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2558                               CGM.getModule(), OMPRTL___kmpc_ordered),
2559                           Args,
2560                           OMPBuilder.getOrCreateRuntimeFunction(
2561                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2562                           Args);
2563     OrderedOpGen.setAction(Action);
2564     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2565     return;
2566   }
2567   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2568 }
2569 
2570 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2571   unsigned Flags;
2572   if (Kind == OMPD_for)
2573     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2574   else if (Kind == OMPD_sections)
2575     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2576   else if (Kind == OMPD_single)
2577     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2578   else if (Kind == OMPD_barrier)
2579     Flags = OMP_IDENT_BARRIER_EXPL;
2580   else
2581     Flags = OMP_IDENT_BARRIER_IMPL;
2582   return Flags;
2583 }
2584 
2585 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2586     CodeGenFunction &CGF, const OMPLoopDirective &S,
2587     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2588   // Check if the loop directive is actually a doacross loop directive. In this
2589   // case choose static, 1 schedule.
2590   if (llvm::any_of(
2591           S.getClausesOfKind<OMPOrderedClause>(),
2592           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2593     ScheduleKind = OMPC_SCHEDULE_static;
2594     // Chunk size is 1 in this case.
2595     llvm::APInt ChunkSize(32, 1);
2596     ChunkExpr = IntegerLiteral::Create(
2597         CGF.getContext(), ChunkSize,
2598         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2599         SourceLocation());
2600   }
2601 }
2602 
2603 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2604                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2605                                       bool ForceSimpleCall) {
2606   // Check if we should use the OMPBuilder
2607   auto *OMPRegionInfo =
2608       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2609   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2610     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2611         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2612     return;
2613   }
2614 
2615   if (!CGF.HaveInsertPoint())
2616     return;
2617   // Build call __kmpc_cancel_barrier(loc, thread_id);
2618   // Build call __kmpc_barrier(loc, thread_id);
2619   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2620   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2621   // thread_id);
2622   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2623                          getThreadID(CGF, Loc)};
2624   if (OMPRegionInfo) {
2625     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2626       llvm::Value *Result = CGF.EmitRuntimeCall(
2627           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2628                                                 OMPRTL___kmpc_cancel_barrier),
2629           Args);
2630       if (EmitChecks) {
2631         // if (__kmpc_cancel_barrier()) {
2632         //   exit from construct;
2633         // }
2634         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2635         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2636         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2637         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2638         CGF.EmitBlock(ExitBB);
2639         //   exit from construct;
2640         CodeGenFunction::JumpDest CancelDestination =
2641             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2642         CGF.EmitBranchThroughCleanup(CancelDestination);
2643         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2644       }
2645       return;
2646     }
2647   }
2648   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2649                           CGM.getModule(), OMPRTL___kmpc_barrier),
2650                       Args);
2651 }
2652 
2653 /// Map the OpenMP loop schedule to the runtime enumeration.
2654 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2655                                           bool Chunked, bool Ordered) {
2656   switch (ScheduleKind) {
2657   case OMPC_SCHEDULE_static:
2658     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2659                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2660   case OMPC_SCHEDULE_dynamic:
2661     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2662   case OMPC_SCHEDULE_guided:
2663     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2664   case OMPC_SCHEDULE_runtime:
2665     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2666   case OMPC_SCHEDULE_auto:
2667     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2668   case OMPC_SCHEDULE_unknown:
2669     assert(!Chunked && "chunk was specified but schedule kind not known");
2670     return Ordered ? OMP_ord_static : OMP_sch_static;
2671   }
2672   llvm_unreachable("Unexpected runtime schedule");
2673 }
2674 
2675 /// Map the OpenMP distribute schedule to the runtime enumeration.
2676 static OpenMPSchedType
2677 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2678   // only static is allowed for dist_schedule
2679   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2680 }
2681 
2682 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2683                                          bool Chunked) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2686   return Schedule == OMP_sch_static;
2687 }
2688 
2689 bool CGOpenMPRuntime::isStaticNonchunked(
2690     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2691   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2692   return Schedule == OMP_dist_sch_static;
2693 }
2694 
2695 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2696                                       bool Chunked) const {
2697   OpenMPSchedType Schedule =
2698       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2699   return Schedule == OMP_sch_static_chunked;
2700 }
2701 
2702 bool CGOpenMPRuntime::isStaticChunked(
2703     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2704   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2705   return Schedule == OMP_dist_sch_static_chunked;
2706 }
2707 
2708 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2709   OpenMPSchedType Schedule =
2710       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2711   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2712   return Schedule != OMP_sch_static;
2713 }
2714 
2715 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2716                                   OpenMPScheduleClauseModifier M1,
2717                                   OpenMPScheduleClauseModifier M2) {
2718   int Modifier = 0;
2719   switch (M1) {
2720   case OMPC_SCHEDULE_MODIFIER_monotonic:
2721     Modifier = OMP_sch_modifier_monotonic;
2722     break;
2723   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2724     Modifier = OMP_sch_modifier_nonmonotonic;
2725     break;
2726   case OMPC_SCHEDULE_MODIFIER_simd:
2727     if (Schedule == OMP_sch_static_chunked)
2728       Schedule = OMP_sch_static_balanced_chunked;
2729     break;
2730   case OMPC_SCHEDULE_MODIFIER_last:
2731   case OMPC_SCHEDULE_MODIFIER_unknown:
2732     break;
2733   }
2734   switch (M2) {
2735   case OMPC_SCHEDULE_MODIFIER_monotonic:
2736     Modifier = OMP_sch_modifier_monotonic;
2737     break;
2738   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2739     Modifier = OMP_sch_modifier_nonmonotonic;
2740     break;
2741   case OMPC_SCHEDULE_MODIFIER_simd:
2742     if (Schedule == OMP_sch_static_chunked)
2743       Schedule = OMP_sch_static_balanced_chunked;
2744     break;
2745   case OMPC_SCHEDULE_MODIFIER_last:
2746   case OMPC_SCHEDULE_MODIFIER_unknown:
2747     break;
2748   }
2749   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2750   // If the static schedule kind is specified or if the ordered clause is
2751   // specified, and if the nonmonotonic modifier is not specified, the effect is
2752   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2753   // modifier is specified, the effect is as if the nonmonotonic modifier is
2754   // specified.
2755   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2756     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2757           Schedule == OMP_sch_static_balanced_chunked ||
2758           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2759           Schedule == OMP_dist_sch_static_chunked ||
2760           Schedule == OMP_dist_sch_static))
2761       Modifier = OMP_sch_modifier_nonmonotonic;
2762   }
2763   return Schedule | Modifier;
2764 }
2765 
2766 void CGOpenMPRuntime::emitForDispatchInit(
2767     CodeGenFunction &CGF, SourceLocation Loc,
2768     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2769     bool Ordered, const DispatchRTInput &DispatchValues) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772   OpenMPSchedType Schedule = getRuntimeSchedule(
2773       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2774   assert(Ordered ||
2775          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2776           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2777           Schedule != OMP_sch_static_balanced_chunked));
2778   // Call __kmpc_dispatch_init(
2779   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2780   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2781   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2782 
2783   // If the Chunk was not specified in the clause - use default value 1.
2784   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2785                                             : CGF.Builder.getIntN(IVSize, 1);
2786   llvm::Value *Args[] = {
2787       emitUpdateLocation(CGF, Loc),
2788       getThreadID(CGF, Loc),
2789       CGF.Builder.getInt32(addMonoNonMonoModifier(
2790           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2791       DispatchValues.LB,                                     // Lower
2792       DispatchValues.UB,                                     // Upper
2793       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2794       Chunk                                                  // Chunk
2795   };
2796   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2797 }
2798 
2799 static void emitForStaticInitCall(
2800     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2801     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2802     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2803     const CGOpenMPRuntime::StaticRTInput &Values) {
2804   if (!CGF.HaveInsertPoint())
2805     return;
2806 
2807   assert(!Values.Ordered);
2808   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2809          Schedule == OMP_sch_static_balanced_chunked ||
2810          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2811          Schedule == OMP_dist_sch_static ||
2812          Schedule == OMP_dist_sch_static_chunked);
2813 
2814   // Call __kmpc_for_static_init(
2815   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2816   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2817   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2818   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2819   llvm::Value *Chunk = Values.Chunk;
2820   if (Chunk == nullptr) {
2821     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2822             Schedule == OMP_dist_sch_static) &&
2823            "expected static non-chunked schedule");
2824     // If the Chunk was not specified in the clause - use default value 1.
2825     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2826   } else {
2827     assert((Schedule == OMP_sch_static_chunked ||
2828             Schedule == OMP_sch_static_balanced_chunked ||
2829             Schedule == OMP_ord_static_chunked ||
2830             Schedule == OMP_dist_sch_static_chunked) &&
2831            "expected static chunked schedule");
2832   }
2833   llvm::Value *Args[] = {
2834       UpdateLocation,
2835       ThreadId,
2836       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2837                                                   M2)), // Schedule type
2838       Values.IL.getPointer(),                           // &isLastIter
2839       Values.LB.getPointer(),                           // &LB
2840       Values.UB.getPointer(),                           // &UB
2841       Values.ST.getPointer(),                           // &Stride
2842       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2843       Chunk                                             // Chunk
2844   };
2845   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2846 }
2847 
2848 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2849                                         SourceLocation Loc,
2850                                         OpenMPDirectiveKind DKind,
2851                                         const OpenMPScheduleTy &ScheduleKind,
2852                                         const StaticRTInput &Values) {
2853   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2854       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2855   assert(isOpenMPWorksharingDirective(DKind) &&
2856          "Expected loop-based or sections-based directive.");
2857   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2858                                              isOpenMPLoopDirective(DKind)
2859                                                  ? OMP_IDENT_WORK_LOOP
2860                                                  : OMP_IDENT_WORK_SECTIONS);
2861   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2862   llvm::FunctionCallee StaticInitFunction =
2863       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2864   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2865   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2866                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2867 }
2868 
2869 void CGOpenMPRuntime::emitDistributeStaticInit(
2870     CodeGenFunction &CGF, SourceLocation Loc,
2871     OpenMPDistScheduleClauseKind SchedKind,
2872     const CGOpenMPRuntime::StaticRTInput &Values) {
2873   OpenMPSchedType ScheduleNum =
2874       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2875   llvm::Value *UpdatedLocation =
2876       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2877   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2878   llvm::FunctionCallee StaticInitFunction =
2879       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2880   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2881                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2882                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2883 }
2884 
2885 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2886                                           SourceLocation Loc,
2887                                           OpenMPDirectiveKind DKind) {
2888   if (!CGF.HaveInsertPoint())
2889     return;
2890   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc,
2893                          isOpenMPDistributeDirective(DKind)
2894                              ? OMP_IDENT_WORK_DISTRIBUTE
2895                              : isOpenMPLoopDirective(DKind)
2896                                    ? OMP_IDENT_WORK_LOOP
2897                                    : OMP_IDENT_WORK_SECTIONS),
2898       getThreadID(CGF, Loc)};
2899   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2900   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2901                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2902                       Args);
2903 }
2904 
2905 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2906                                                  SourceLocation Loc,
2907                                                  unsigned IVSize,
2908                                                  bool IVSigned) {
2909   if (!CGF.HaveInsertPoint())
2910     return;
2911   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2912   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2913   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2914 }
2915 
2916 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2917                                           SourceLocation Loc, unsigned IVSize,
2918                                           bool IVSigned, Address IL,
2919                                           Address LB, Address UB,
2920                                           Address ST) {
2921   // Call __kmpc_dispatch_next(
2922   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2923   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2924   //          kmp_int[32|64] *p_stride);
2925   llvm::Value *Args[] = {
2926       emitUpdateLocation(CGF, Loc),
2927       getThreadID(CGF, Loc),
2928       IL.getPointer(), // &isLastIter
2929       LB.getPointer(), // &Lower
2930       UB.getPointer(), // &Upper
2931       ST.getPointer()  // &Stride
2932   };
2933   llvm::Value *Call =
2934       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2935   return CGF.EmitScalarConversion(
2936       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2937       CGF.getContext().BoolTy, Loc);
2938 }
2939 
2940 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2941                                            llvm::Value *NumThreads,
2942                                            SourceLocation Loc) {
2943   if (!CGF.HaveInsertPoint())
2944     return;
2945   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2946   llvm::Value *Args[] = {
2947       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2948       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2949   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2950                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2951                       Args);
2952 }
2953 
2954 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2955                                          ProcBindKind ProcBind,
2956                                          SourceLocation Loc) {
2957   if (!CGF.HaveInsertPoint())
2958     return;
2959   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2960   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2961   llvm::Value *Args[] = {
2962       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2963       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2964   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2966                       Args);
2967 }
2968 
2969 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2970                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2971   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2972     OMPBuilder.CreateFlush(CGF.Builder);
2973   } else {
2974     if (!CGF.HaveInsertPoint())
2975       return;
2976     // Build call void __kmpc_flush(ident_t *loc)
2977     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2978                             CGM.getModule(), OMPRTL___kmpc_flush),
2979                         emitUpdateLocation(CGF, Loc));
2980   }
2981 }
2982 
2983 namespace {
2984 /// Indexes of fields for type kmp_task_t.
2985 enum KmpTaskTFields {
2986   /// List of shared variables.
2987   KmpTaskTShareds,
2988   /// Task routine.
2989   KmpTaskTRoutine,
2990   /// Partition id for the untied tasks.
2991   KmpTaskTPartId,
2992   /// Function with call of destructors for private variables.
2993   Data1,
2994   /// Task priority.
2995   Data2,
2996   /// (Taskloops only) Lower bound.
2997   KmpTaskTLowerBound,
2998   /// (Taskloops only) Upper bound.
2999   KmpTaskTUpperBound,
3000   /// (Taskloops only) Stride.
3001   KmpTaskTStride,
3002   /// (Taskloops only) Is last iteration flag.
3003   KmpTaskTLastIter,
3004   /// (Taskloops only) Reduction data.
3005   KmpTaskTReductions,
3006 };
3007 } // anonymous namespace
3008 
3009 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3010   return OffloadEntriesTargetRegion.empty() &&
3011          OffloadEntriesDeviceGlobalVar.empty();
3012 }
3013 
3014 /// Initialize target region entry.
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017                                     StringRef ParentName, unsigned LineNum,
3018                                     unsigned Order) {
3019   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3020                                              "only required for the device "
3021                                              "code generation.");
3022   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3023       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3024                                    OMPTargetRegionEntryTargetRegion);
3025   ++OffloadingEntriesNum;
3026 }
3027 
3028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3029     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3030                                   StringRef ParentName, unsigned LineNum,
3031                                   llvm::Constant *Addr, llvm::Constant *ID,
3032                                   OMPTargetRegionEntryKind Flags) {
3033   // If we are emitting code for a target, the entry is already initialized,
3034   // only has to be registered.
3035   if (CGM.getLangOpts().OpenMPIsDevice) {
3036     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3037       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3038           DiagnosticsEngine::Error,
3039           "Unable to find target region on line '%0' in the device code.");
3040       CGM.getDiags().Report(DiagID) << LineNum;
3041       return;
3042     }
3043     auto &Entry =
3044         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3045     assert(Entry.isValid() && "Entry not initialized!");
3046     Entry.setAddress(Addr);
3047     Entry.setID(ID);
3048     Entry.setFlags(Flags);
3049   } else {
3050     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3051     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3052     ++OffloadingEntriesNum;
3053   }
3054 }
3055 
3056 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3057     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3058     unsigned LineNum) const {
3059   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3060   if (PerDevice == OffloadEntriesTargetRegion.end())
3061     return false;
3062   auto PerFile = PerDevice->second.find(FileID);
3063   if (PerFile == PerDevice->second.end())
3064     return false;
3065   auto PerParentName = PerFile->second.find(ParentName);
3066   if (PerParentName == PerFile->second.end())
3067     return false;
3068   auto PerLine = PerParentName->second.find(LineNum);
3069   if (PerLine == PerParentName->second.end())
3070     return false;
3071   // Fail if this entry is already registered.
3072   if (PerLine->second.getAddress() || PerLine->second.getID())
3073     return false;
3074   return true;
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3078     const OffloadTargetRegionEntryInfoActTy &Action) {
3079   // Scan all target region entries and perform the provided action.
3080   for (const auto &D : OffloadEntriesTargetRegion)
3081     for (const auto &F : D.second)
3082       for (const auto &P : F.second)
3083         for (const auto &L : P.second)
3084           Action(D.first, F.first, P.first(), L.first, L.second);
3085 }
3086 
3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3088     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3089                                        OMPTargetGlobalVarEntryKind Flags,
3090                                        unsigned Order) {
3091   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3092                                              "only required for the device "
3093                                              "code generation.");
3094   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3095   ++OffloadingEntriesNum;
3096 }
3097 
3098 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3099     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3100                                      CharUnits VarSize,
3101                                      OMPTargetGlobalVarEntryKind Flags,
3102                                      llvm::GlobalValue::LinkageTypes Linkage) {
3103   if (CGM.getLangOpts().OpenMPIsDevice) {
3104     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3105     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3106            "Entry not initialized!");
3107     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3108            "Resetting with the new address.");
3109     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3110       if (Entry.getVarSize().isZero()) {
3111         Entry.setVarSize(VarSize);
3112         Entry.setLinkage(Linkage);
3113       }
3114       return;
3115     }
3116     Entry.setVarSize(VarSize);
3117     Entry.setLinkage(Linkage);
3118     Entry.setAddress(Addr);
3119   } else {
3120     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3121       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3122       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3123              "Entry not initialized!");
3124       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3125              "Resetting with the new address.");
3126       if (Entry.getVarSize().isZero()) {
3127         Entry.setVarSize(VarSize);
3128         Entry.setLinkage(Linkage);
3129       }
3130       return;
3131     }
3132     OffloadEntriesDeviceGlobalVar.try_emplace(
3133         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3134     ++OffloadingEntriesNum;
3135   }
3136 }
3137 
3138 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3139     actOnDeviceGlobalVarEntriesInfo(
3140         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3141   // Scan all target region entries and perform the provided action.
3142   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3143     Action(E.getKey(), E.getValue());
3144 }
3145 
3146 void CGOpenMPRuntime::createOffloadEntry(
3147     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3148     llvm::GlobalValue::LinkageTypes Linkage) {
3149   StringRef Name = Addr->getName();
3150   llvm::Module &M = CGM.getModule();
3151   llvm::LLVMContext &C = M.getContext();
3152 
3153   // Create constant string with the name.
3154   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3155 
3156   std::string StringName = getName({"omp_offloading", "entry_name"});
3157   auto *Str = new llvm::GlobalVariable(
3158       M, StrPtrInit->getType(), /*isConstant=*/true,
3159       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3160   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3161 
3162   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3163                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3164                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3165                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3166                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3167   std::string EntryName = getName({"omp_offloading", "entry", ""});
3168   llvm::GlobalVariable *Entry = createGlobalStruct(
3169       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3170       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3171 
3172   // The entry has to be created in the section the linker expects it to be.
3173   Entry->setSection("omp_offloading_entries");
3174 }
3175 
3176 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3177   // Emit the offloading entries and metadata so that the device codegen side
3178   // can easily figure out what to emit. The produced metadata looks like
3179   // this:
3180   //
3181   // !omp_offload.info = !{!1, ...}
3182   //
3183   // Right now we only generate metadata for function that contain target
3184   // regions.
3185 
3186   // If we are in simd mode or there are no entries, we don't need to do
3187   // anything.
3188   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3189     return;
3190 
3191   llvm::Module &M = CGM.getModule();
3192   llvm::LLVMContext &C = M.getContext();
3193   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3194                          SourceLocation, StringRef>,
3195               16>
3196       OrderedEntries(OffloadEntriesInfoManager.size());
3197   llvm::SmallVector<StringRef, 16> ParentFunctions(
3198       OffloadEntriesInfoManager.size());
3199 
3200   // Auxiliary methods to create metadata values and strings.
3201   auto &&GetMDInt = [this](unsigned V) {
3202     return llvm::ConstantAsMetadata::get(
3203         llvm::ConstantInt::get(CGM.Int32Ty, V));
3204   };
3205 
3206   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3207 
3208   // Create the offloading info metadata node.
3209   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3210 
3211   // Create function that emits metadata for each target region entry;
3212   auto &&TargetRegionMetadataEmitter =
3213       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3214        &GetMDString](
3215           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3216           unsigned Line,
3217           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3218         // Generate metadata for target regions. Each entry of this metadata
3219         // contains:
3220         // - Entry 0 -> Kind of this type of metadata (0).
3221         // - Entry 1 -> Device ID of the file where the entry was identified.
3222         // - Entry 2 -> File ID of the file where the entry was identified.
3223         // - Entry 3 -> Mangled name of the function where the entry was
3224         // identified.
3225         // - Entry 4 -> Line in the file where the entry was identified.
3226         // - Entry 5 -> Order the entry was created.
3227         // The first element of the metadata node is the kind.
3228         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3229                                  GetMDInt(FileID),      GetMDString(ParentName),
3230                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3231 
3232         SourceLocation Loc;
3233         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3234                   E = CGM.getContext().getSourceManager().fileinfo_end();
3235              I != E; ++I) {
3236           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3237               I->getFirst()->getUniqueID().getFile() == FileID) {
3238             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3239                 I->getFirst(), Line, 1);
3240             break;
3241           }
3242         }
3243         // Save this entry in the right position of the ordered entries array.
3244         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3245         ParentFunctions[E.getOrder()] = ParentName;
3246 
3247         // Add metadata to the named metadata node.
3248         MD->addOperand(llvm::MDNode::get(C, Ops));
3249       };
3250 
3251   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3252       TargetRegionMetadataEmitter);
3253 
3254   // Create function that emits metadata for each device global variable entry;
3255   auto &&DeviceGlobalVarMetadataEmitter =
3256       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3257        MD](StringRef MangledName,
3258            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3259                &E) {
3260         // Generate metadata for global variables. Each entry of this metadata
3261         // contains:
3262         // - Entry 0 -> Kind of this type of metadata (1).
3263         // - Entry 1 -> Mangled name of the variable.
3264         // - Entry 2 -> Declare target kind.
3265         // - Entry 3 -> Order the entry was created.
3266         // The first element of the metadata node is the kind.
3267         llvm::Metadata *Ops[] = {
3268             GetMDInt(E.getKind()), GetMDString(MangledName),
3269             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3270 
3271         // Save this entry in the right position of the ordered entries array.
3272         OrderedEntries[E.getOrder()] =
3273             std::make_tuple(&E, SourceLocation(), MangledName);
3274 
3275         // Add metadata to the named metadata node.
3276         MD->addOperand(llvm::MDNode::get(C, Ops));
3277       };
3278 
3279   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3280       DeviceGlobalVarMetadataEmitter);
3281 
3282   for (const auto &E : OrderedEntries) {
3283     assert(std::get<0>(E) && "All ordered entries must exist!");
3284     if (const auto *CE =
3285             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3286                 std::get<0>(E))) {
3287       if (!CE->getID() || !CE->getAddress()) {
3288         // Do not blame the entry if the parent funtion is not emitted.
3289         StringRef FnName = ParentFunctions[CE->getOrder()];
3290         if (!CGM.GetGlobalValue(FnName))
3291           continue;
3292         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3293             DiagnosticsEngine::Error,
3294             "Offloading entry for target region in %0 is incorrect: either the "
3295             "address or the ID is invalid.");
3296         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3297         continue;
3298       }
3299       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3300                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3301     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3302                                              OffloadEntryInfoDeviceGlobalVar>(
3303                    std::get<0>(E))) {
3304       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3305           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3306               CE->getFlags());
3307       switch (Flags) {
3308       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3309         if (CGM.getLangOpts().OpenMPIsDevice &&
3310             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3311           continue;
3312         if (!CE->getAddress()) {
3313           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3314               DiagnosticsEngine::Error, "Offloading entry for declare target "
3315                                         "variable %0 is incorrect: the "
3316                                         "address is invalid.");
3317           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3318           continue;
3319         }
3320         // The vaiable has no definition - no need to add the entry.
3321         if (CE->getVarSize().isZero())
3322           continue;
3323         break;
3324       }
3325       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3326         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3327                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3328                "Declaret target link address is set.");
3329         if (CGM.getLangOpts().OpenMPIsDevice)
3330           continue;
3331         if (!CE->getAddress()) {
3332           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3333               DiagnosticsEngine::Error,
3334               "Offloading entry for declare target variable is incorrect: the "
3335               "address is invalid.");
3336           CGM.getDiags().Report(DiagID);
3337           continue;
3338         }
3339         break;
3340       }
3341       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3342                          CE->getVarSize().getQuantity(), Flags,
3343                          CE->getLinkage());
3344     } else {
3345       llvm_unreachable("Unsupported entry kind.");
3346     }
3347   }
3348 }
3349 
3350 /// Loads all the offload entries information from the host IR
3351 /// metadata.
3352 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3353   // If we are in target mode, load the metadata from the host IR. This code has
3354   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3355 
3356   if (!CGM.getLangOpts().OpenMPIsDevice)
3357     return;
3358 
3359   if (CGM.getLangOpts().OMPHostIRFile.empty())
3360     return;
3361 
3362   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3363   if (auto EC = Buf.getError()) {
3364     CGM.getDiags().Report(diag::err_cannot_open_file)
3365         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366     return;
3367   }
3368 
3369   llvm::LLVMContext C;
3370   auto ME = expectedToErrorOrAndEmitErrors(
3371       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3372 
3373   if (auto EC = ME.getError()) {
3374     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3375         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3376     CGM.getDiags().Report(DiagID)
3377         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3378     return;
3379   }
3380 
3381   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3382   if (!MD)
3383     return;
3384 
3385   for (llvm::MDNode *MN : MD->operands()) {
3386     auto &&GetMDInt = [MN](unsigned Idx) {
3387       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3388       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3389     };
3390 
3391     auto &&GetMDString = [MN](unsigned Idx) {
3392       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3393       return V->getString();
3394     };
3395 
3396     switch (GetMDInt(0)) {
3397     default:
3398       llvm_unreachable("Unexpected metadata!");
3399       break;
3400     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3401         OffloadingEntryInfoTargetRegion:
3402       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3403           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3404           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3405           /*Order=*/GetMDInt(5));
3406       break;
3407     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3408         OffloadingEntryInfoDeviceGlobalVar:
3409       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3410           /*MangledName=*/GetMDString(1),
3411           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3412               /*Flags=*/GetMDInt(2)),
3413           /*Order=*/GetMDInt(3));
3414       break;
3415     }
3416   }
3417 }
3418 
3419 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3420   if (!KmpRoutineEntryPtrTy) {
3421     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3422     ASTContext &C = CGM.getContext();
3423     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3424     FunctionProtoType::ExtProtoInfo EPI;
3425     KmpRoutineEntryPtrQTy = C.getPointerType(
3426         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3427     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3428   }
3429 }
3430 
3431 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3432   // Make sure the type of the entry is already created. This is the type we
3433   // have to create:
3434   // struct __tgt_offload_entry{
3435   //   void      *addr;       // Pointer to the offload entry info.
3436   //                          // (function or global)
3437   //   char      *name;       // Name of the function or global.
3438   //   size_t     size;       // Size of the entry info (0 if it a function).
3439   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3440   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3441   // };
3442   if (TgtOffloadEntryQTy.isNull()) {
3443     ASTContext &C = CGM.getContext();
3444     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3445     RD->startDefinition();
3446     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3447     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3448     addFieldToRecordDecl(C, RD, C.getSizeType());
3449     addFieldToRecordDecl(
3450         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3451     addFieldToRecordDecl(
3452         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3453     RD->completeDefinition();
3454     RD->addAttr(PackedAttr::CreateImplicit(C));
3455     TgtOffloadEntryQTy = C.getRecordType(RD);
3456   }
3457   return TgtOffloadEntryQTy;
3458 }
3459 
3460 namespace {
3461 struct PrivateHelpersTy {
3462   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3463                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3464       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3465         PrivateElemInit(PrivateElemInit) {}
3466   const Expr *OriginalRef = nullptr;
3467   const VarDecl *Original = nullptr;
3468   const VarDecl *PrivateCopy = nullptr;
3469   const VarDecl *PrivateElemInit = nullptr;
3470 };
3471 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3472 } // anonymous namespace
3473 
3474 static RecordDecl *
3475 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3476   if (!Privates.empty()) {
3477     ASTContext &C = CGM.getContext();
3478     // Build struct .kmp_privates_t. {
3479     //         /*  private vars  */
3480     //       };
3481     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3482     RD->startDefinition();
3483     for (const auto &Pair : Privates) {
3484       const VarDecl *VD = Pair.second.Original;
3485       QualType Type = VD->getType().getNonReferenceType();
3486       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3487       if (VD->hasAttrs()) {
3488         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3489              E(VD->getAttrs().end());
3490              I != E; ++I)
3491           FD->addAttr(*I);
3492       }
3493     }
3494     RD->completeDefinition();
3495     return RD;
3496   }
3497   return nullptr;
3498 }
3499 
3500 static RecordDecl *
3501 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3502                          QualType KmpInt32Ty,
3503                          QualType KmpRoutineEntryPointerQTy) {
3504   ASTContext &C = CGM.getContext();
3505   // Build struct kmp_task_t {
3506   //         void *              shareds;
3507   //         kmp_routine_entry_t routine;
3508   //         kmp_int32           part_id;
3509   //         kmp_cmplrdata_t data1;
3510   //         kmp_cmplrdata_t data2;
3511   // For taskloops additional fields:
3512   //         kmp_uint64          lb;
3513   //         kmp_uint64          ub;
3514   //         kmp_int64           st;
3515   //         kmp_int32           liter;
3516   //         void *              reductions;
3517   //       };
3518   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3519   UD->startDefinition();
3520   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3521   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3522   UD->completeDefinition();
3523   QualType KmpCmplrdataTy = C.getRecordType(UD);
3524   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3525   RD->startDefinition();
3526   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3527   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3528   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3529   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3530   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3531   if (isOpenMPTaskLoopDirective(Kind)) {
3532     QualType KmpUInt64Ty =
3533         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3534     QualType KmpInt64Ty =
3535         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3536     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3537     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3538     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3539     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3541   }
3542   RD->completeDefinition();
3543   return RD;
3544 }
3545 
3546 static RecordDecl *
3547 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3548                                      ArrayRef<PrivateDataTy> Privates) {
3549   ASTContext &C = CGM.getContext();
3550   // Build struct kmp_task_t_with_privates {
3551   //         kmp_task_t task_data;
3552   //         .kmp_privates_t. privates;
3553   //       };
3554   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3555   RD->startDefinition();
3556   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3557   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3558     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3559   RD->completeDefinition();
3560   return RD;
3561 }
3562 
3563 /// Emit a proxy function which accepts kmp_task_t as the second
3564 /// argument.
3565 /// \code
3566 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3567 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3568 ///   For taskloops:
3569 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3570 ///   tt->reductions, tt->shareds);
3571 ///   return 0;
3572 /// }
3573 /// \endcode
3574 static llvm::Function *
3575 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3576                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3577                       QualType KmpTaskTWithPrivatesPtrQTy,
3578                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3579                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3580                       llvm::Value *TaskPrivatesMap) {
3581   ASTContext &C = CGM.getContext();
3582   FunctionArgList Args;
3583   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3584                             ImplicitParamDecl::Other);
3585   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3586                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3587                                 ImplicitParamDecl::Other);
3588   Args.push_back(&GtidArg);
3589   Args.push_back(&TaskTypeArg);
3590   const auto &TaskEntryFnInfo =
3591       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3592   llvm::FunctionType *TaskEntryTy =
3593       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3594   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3595   auto *TaskEntry = llvm::Function::Create(
3596       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3597   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3598   TaskEntry->setDoesNotRecurse();
3599   CodeGenFunction CGF(CGM);
3600   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3601                     Loc, Loc);
3602 
3603   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3604   // tt,
3605   // For taskloops:
3606   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3607   // tt->task_data.shareds);
3608   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3609       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3610   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3611       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3612       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3613   const auto *KmpTaskTWithPrivatesQTyRD =
3614       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3615   LValue Base =
3616       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3617   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3618   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3619   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3620   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3621 
3622   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3623   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3624   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3625       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3626       CGF.ConvertTypeForMem(SharedsPtrTy));
3627 
3628   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3629   llvm::Value *PrivatesParam;
3630   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3631     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3632     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3633         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3634   } else {
3635     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3636   }
3637 
3638   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3639                                TaskPrivatesMap,
3640                                CGF.Builder
3641                                    .CreatePointerBitCastOrAddrSpaceCast(
3642                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3643                                    .getPointer()};
3644   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3645                                           std::end(CommonArgs));
3646   if (isOpenMPTaskLoopDirective(Kind)) {
3647     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3648     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3649     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3650     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3651     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3652     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3653     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3654     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3655     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3656     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3657     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3658     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3659     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3660     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3661     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3662     CallArgs.push_back(LBParam);
3663     CallArgs.push_back(UBParam);
3664     CallArgs.push_back(StParam);
3665     CallArgs.push_back(LIParam);
3666     CallArgs.push_back(RParam);
3667   }
3668   CallArgs.push_back(SharedsParam);
3669 
3670   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3671                                                   CallArgs);
3672   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3673                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3674   CGF.FinishFunction();
3675   return TaskEntry;
3676 }
3677 
3678 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3679                                             SourceLocation Loc,
3680                                             QualType KmpInt32Ty,
3681                                             QualType KmpTaskTWithPrivatesPtrQTy,
3682                                             QualType KmpTaskTWithPrivatesQTy) {
3683   ASTContext &C = CGM.getContext();
3684   FunctionArgList Args;
3685   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3686                             ImplicitParamDecl::Other);
3687   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3688                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3689                                 ImplicitParamDecl::Other);
3690   Args.push_back(&GtidArg);
3691   Args.push_back(&TaskTypeArg);
3692   const auto &DestructorFnInfo =
3693       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3694   llvm::FunctionType *DestructorFnTy =
3695       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3696   std::string Name =
3697       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3698   auto *DestructorFn =
3699       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3700                              Name, &CGM.getModule());
3701   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3702                                     DestructorFnInfo);
3703   DestructorFn->setDoesNotRecurse();
3704   CodeGenFunction CGF(CGM);
3705   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3706                     Args, Loc, Loc);
3707 
3708   LValue Base = CGF.EmitLoadOfPointerLValue(
3709       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3710       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3711   const auto *KmpTaskTWithPrivatesQTyRD =
3712       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3713   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3714   Base = CGF.EmitLValueForField(Base, *FI);
3715   for (const auto *Field :
3716        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3717     if (QualType::DestructionKind DtorKind =
3718             Field->getType().isDestructedType()) {
3719       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3720       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3721     }
3722   }
3723   CGF.FinishFunction();
3724   return DestructorFn;
3725 }
3726 
3727 /// Emit a privates mapping function for correct handling of private and
3728 /// firstprivate variables.
3729 /// \code
3730 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3731 /// **noalias priv1,...,  <tyn> **noalias privn) {
3732 ///   *priv1 = &.privates.priv1;
3733 ///   ...;
3734 ///   *privn = &.privates.privn;
3735 /// }
3736 /// \endcode
3737 static llvm::Value *
3738 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3739                                ArrayRef<const Expr *> PrivateVars,
3740                                ArrayRef<const Expr *> FirstprivateVars,
3741                                ArrayRef<const Expr *> LastprivateVars,
3742                                QualType PrivatesQTy,
3743                                ArrayRef<PrivateDataTy> Privates) {
3744   ASTContext &C = CGM.getContext();
3745   FunctionArgList Args;
3746   ImplicitParamDecl TaskPrivatesArg(
3747       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3748       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3749       ImplicitParamDecl::Other);
3750   Args.push_back(&TaskPrivatesArg);
3751   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3752   unsigned Counter = 1;
3753   for (const Expr *E : PrivateVars) {
3754     Args.push_back(ImplicitParamDecl::Create(
3755         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756         C.getPointerType(C.getPointerType(E->getType()))
3757             .withConst()
3758             .withRestrict(),
3759         ImplicitParamDecl::Other));
3760     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3761     PrivateVarsPos[VD] = Counter;
3762     ++Counter;
3763   }
3764   for (const Expr *E : FirstprivateVars) {
3765     Args.push_back(ImplicitParamDecl::Create(
3766         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3767         C.getPointerType(C.getPointerType(E->getType()))
3768             .withConst()
3769             .withRestrict(),
3770         ImplicitParamDecl::Other));
3771     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3772     PrivateVarsPos[VD] = Counter;
3773     ++Counter;
3774   }
3775   for (const Expr *E : LastprivateVars) {
3776     Args.push_back(ImplicitParamDecl::Create(
3777         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3778         C.getPointerType(C.getPointerType(E->getType()))
3779             .withConst()
3780             .withRestrict(),
3781         ImplicitParamDecl::Other));
3782     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3847   // PointersArray and SizesArray. The original variables for these arrays are
3848   // not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     const VarDecl *VD = Pair.second.PrivateCopy;
3859     const Expr *Init = VD->getAnyInitializer();
3860     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3861                              !CGF.isTrivialInitializer(Init)))) {
3862       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3863       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3864         const VarDecl *OriginalVD = Pair.second.Original;
3865         // Check if the variable is the target-based BasePointersArray,
3866         // PointersArray or SizesArray.
3867         LValue SharedRefLValue;
3868         QualType Type = PrivateLValue.getType();
3869         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3870         if (IsTargetTask && !SharedField) {
3871           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3872                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3873                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3874                          ->getNumParams() == 0 &&
3875                  isa<TranslationUnitDecl>(
3876                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3877                          ->getDeclContext()) &&
3878                  "Expected artificial target data variable.");
3879           SharedRefLValue =
3880               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3881         } else if (ForDup) {
3882           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3883           SharedRefLValue = CGF.MakeAddrLValue(
3884               Address(SharedRefLValue.getPointer(CGF),
3885                       C.getDeclAlign(OriginalVD)),
3886               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3887               SharedRefLValue.getTBAAInfo());
3888         } else if (CGF.LambdaCaptureFields.count(
3889                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3890                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3891           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3892         } else {
3893           // Processing for implicitly captured variables.
3894           InlinedOpenMPRegionRAII Region(
3895               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3896               /*HasCancel=*/false);
3897           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3898         }
3899         if (Type->isArrayType()) {
3900           // Initialize firstprivate array.
3901           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3902             // Perform simple memcpy.
3903             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3904           } else {
3905             // Initialize firstprivate array using element-by-element
3906             // initialization.
3907             CGF.EmitOMPAggregateAssign(
3908                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3909                 Type,
3910                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3911                                                   Address SrcElement) {
3912                   // Clean up any temporaries needed by the initialization.
3913                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3914                   InitScope.addPrivate(
3915                       Elem, [SrcElement]() -> Address { return SrcElement; });
3916                   (void)InitScope.Privatize();
3917                   // Emit initialization for single element.
3918                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3919                       CGF, &CapturesInfo);
3920                   CGF.EmitAnyExprToMem(Init, DestElement,
3921                                        Init->getType().getQualifiers(),
3922                                        /*IsInitializer=*/false);
3923                 });
3924           }
3925         } else {
3926           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3927           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3928             return SharedRefLValue.getAddress(CGF);
3929           });
3930           (void)InitScope.Privatize();
3931           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3932           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3933                              /*capturedByInit=*/false);
3934         }
3935       } else {
3936         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3937       }
3938     }
3939     ++FI;
3940   }
3941 }
3942 
3943 /// Check if duplication function is required for taskloops.
3944 static bool checkInitIsRequired(CodeGenFunction &CGF,
3945                                 ArrayRef<PrivateDataTy> Privates) {
3946   bool InitRequired = false;
3947   for (const PrivateDataTy &Pair : Privates) {
3948     const VarDecl *VD = Pair.second.PrivateCopy;
3949     const Expr *Init = VD->getAnyInitializer();
3950     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3951                                     !CGF.isTrivialInitializer(Init));
3952     if (InitRequired)
3953       break;
3954   }
3955   return InitRequired;
3956 }
3957 
3958 
3959 /// Emit task_dup function (for initialization of
3960 /// private/firstprivate/lastprivate vars and last_iter flag)
3961 /// \code
3962 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3963 /// lastpriv) {
3964 /// // setup lastprivate flag
3965 ///    task_dst->last = lastpriv;
3966 /// // could be constructor calls here...
3967 /// }
3968 /// \endcode
3969 static llvm::Value *
3970 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3971                     const OMPExecutableDirective &D,
3972                     QualType KmpTaskTWithPrivatesPtrQTy,
3973                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3974                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3975                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3976                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3977   ASTContext &C = CGM.getContext();
3978   FunctionArgList Args;
3979   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3980                            KmpTaskTWithPrivatesPtrQTy,
3981                            ImplicitParamDecl::Other);
3982   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3983                            KmpTaskTWithPrivatesPtrQTy,
3984                            ImplicitParamDecl::Other);
3985   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3986                                 ImplicitParamDecl::Other);
3987   Args.push_back(&DstArg);
3988   Args.push_back(&SrcArg);
3989   Args.push_back(&LastprivArg);
3990   const auto &TaskDupFnInfo =
3991       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3992   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3993   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3994   auto *TaskDup = llvm::Function::Create(
3995       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3996   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3997   TaskDup->setDoesNotRecurse();
3998   CodeGenFunction CGF(CGM);
3999   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4000                     Loc);
4001 
4002   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4003       CGF.GetAddrOfLocalVar(&DstArg),
4004       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4005   // task_dst->liter = lastpriv;
4006   if (WithLastIter) {
4007     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4008     LValue Base = CGF.EmitLValueForField(
4009         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4010     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4011     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4012         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4013     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4014   }
4015 
4016   // Emit initial values for private copies (if any).
4017   assert(!Privates.empty());
4018   Address KmpTaskSharedsPtr = Address::invalid();
4019   if (!Data.FirstprivateVars.empty()) {
4020     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4021         CGF.GetAddrOfLocalVar(&SrcArg),
4022         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4023     LValue Base = CGF.EmitLValueForField(
4024         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4025     KmpTaskSharedsPtr = Address(
4026         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4027                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4028                                                   KmpTaskTShareds)),
4029                              Loc),
4030         CGM.getNaturalTypeAlignment(SharedsTy));
4031   }
4032   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4033                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4034   CGF.FinishFunction();
4035   return TaskDup;
4036 }
4037 
4038 /// Checks if destructor function is required to be generated.
4039 /// \return true if cleanups are required, false otherwise.
4040 static bool
4041 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4042   bool NeedsCleanup = false;
4043   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4044   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4045   for (const FieldDecl *FD : PrivateRD->fields()) {
4046     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4047     if (NeedsCleanup)
4048       break;
4049   }
4050   return NeedsCleanup;
4051 }
4052 
4053 namespace {
4054 /// Loop generator for OpenMP iterator expression.
4055 class OMPIteratorGeneratorScope final
4056     : public CodeGenFunction::OMPPrivateScope {
4057   CodeGenFunction &CGF;
4058   const OMPIteratorExpr *E = nullptr;
4059   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4060   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4061   OMPIteratorGeneratorScope() = delete;
4062   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4063 
4064 public:
4065   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4066       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4067     if (!E)
4068       return;
4069     SmallVector<llvm::Value *, 4> Uppers;
4070     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4071       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4072       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4073       addPrivate(VD, [&CGF, VD]() {
4074         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4075       });
4076       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4077       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4078         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4079                                  "counter.addr");
4080       });
4081     }
4082     Privatize();
4083 
4084     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4085       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4086       LValue CLVal =
4087           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4088                              HelperData.CounterVD->getType());
4089       // Counter = 0;
4090       CGF.EmitStoreOfScalar(
4091           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4092           CLVal);
4093       CodeGenFunction::JumpDest &ContDest =
4094           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4095       CodeGenFunction::JumpDest &ExitDest =
4096           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4097       // N = <number-of_iterations>;
4098       llvm::Value *N = Uppers[I];
4099       // cont:
4100       // if (Counter < N) goto body; else goto exit;
4101       CGF.EmitBlock(ContDest.getBlock());
4102       auto *CVal =
4103           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4104       llvm::Value *Cmp =
4105           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4106               ? CGF.Builder.CreateICmpSLT(CVal, N)
4107               : CGF.Builder.CreateICmpULT(CVal, N);
4108       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4109       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4110       // body:
4111       CGF.EmitBlock(BodyBB);
4112       // Iteri = Begini + Counter * Stepi;
4113       CGF.EmitIgnoredExpr(HelperData.Update);
4114     }
4115   }
4116   ~OMPIteratorGeneratorScope() {
4117     if (!E)
4118       return;
4119     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4120       // Counter = Counter + 1;
4121       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4122       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4123       // goto cont;
4124       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4125       // exit:
4126       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4127     }
4128   }
4129 };
4130 } // namespace
4131 
4132 static std::pair<llvm::Value *, llvm::Value *>
4133 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4134   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4135   llvm::Value *Addr;
4136   if (OASE) {
4137     const Expr *Base = OASE->getBase();
4138     Addr = CGF.EmitScalarExpr(Base);
4139   } else {
4140     Addr = CGF.EmitLValue(E).getPointer(CGF);
4141   }
4142   llvm::Value *SizeVal;
4143   QualType Ty = E->getType();
4144   if (OASE) {
4145     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4146     for (const Expr *SE : OASE->getDimensions()) {
4147       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4148       Sz = CGF.EmitScalarConversion(
4149           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4150       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4151     }
4152   } else if (const auto *ASE =
4153                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4154     LValue UpAddrLVal =
4155         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4156     llvm::Value *UpAddr =
4157         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4158     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4159     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4160     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4161   } else {
4162     SizeVal = CGF.getTypeSize(Ty);
4163   }
4164   return std::make_pair(Addr, SizeVal);
4165 }
4166 
4167 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4168 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4169   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4170   if (KmpTaskAffinityInfoTy.isNull()) {
4171     RecordDecl *KmpAffinityInfoRD =
4172         C.buildImplicitRecord("kmp_task_affinity_info_t");
4173     KmpAffinityInfoRD->startDefinition();
4174     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4175     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4176     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4177     KmpAffinityInfoRD->completeDefinition();
4178     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4179   }
4180 }
4181 
4182 CGOpenMPRuntime::TaskResultTy
4183 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4184                               const OMPExecutableDirective &D,
4185                               llvm::Function *TaskFunction, QualType SharedsTy,
4186                               Address Shareds, const OMPTaskDataTy &Data) {
4187   ASTContext &C = CGM.getContext();
4188   llvm::SmallVector<PrivateDataTy, 4> Privates;
4189   // Aggregate privates and sort them by the alignment.
4190   const auto *I = Data.PrivateCopies.begin();
4191   for (const Expr *E : Data.PrivateVars) {
4192     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4193     Privates.emplace_back(
4194         C.getDeclAlign(VD),
4195         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4196                          /*PrivateElemInit=*/nullptr));
4197     ++I;
4198   }
4199   I = Data.FirstprivateCopies.begin();
4200   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4201   for (const Expr *E : Data.FirstprivateVars) {
4202     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4203     Privates.emplace_back(
4204         C.getDeclAlign(VD),
4205         PrivateHelpersTy(
4206             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4207             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4208     ++I;
4209     ++IElemInitRef;
4210   }
4211   I = Data.LastprivateCopies.begin();
4212   for (const Expr *E : Data.LastprivateVars) {
4213     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4214     Privates.emplace_back(
4215         C.getDeclAlign(VD),
4216         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4217                          /*PrivateElemInit=*/nullptr));
4218     ++I;
4219   }
4220   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4221     return L.first > R.first;
4222   });
4223   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4224   // Build type kmp_routine_entry_t (if not built yet).
4225   emitKmpRoutineEntryT(KmpInt32Ty);
4226   // Build type kmp_task_t (if not built yet).
4227   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4228     if (SavedKmpTaskloopTQTy.isNull()) {
4229       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4230           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4231     }
4232     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4233   } else {
4234     assert((D.getDirectiveKind() == OMPD_task ||
4235             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4236             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4237            "Expected taskloop, task or target directive");
4238     if (SavedKmpTaskTQTy.isNull()) {
4239       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4240           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4241     }
4242     KmpTaskTQTy = SavedKmpTaskTQTy;
4243   }
4244   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4245   // Build particular struct kmp_task_t for the given task.
4246   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4247       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4248   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4249   QualType KmpTaskTWithPrivatesPtrQTy =
4250       C.getPointerType(KmpTaskTWithPrivatesQTy);
4251   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4252   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4253       KmpTaskTWithPrivatesTy->getPointerTo();
4254   llvm::Value *KmpTaskTWithPrivatesTySize =
4255       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4256   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4257 
4258   // Emit initial values for private copies (if any).
4259   llvm::Value *TaskPrivatesMap = nullptr;
4260   llvm::Type *TaskPrivatesMapTy =
4261       std::next(TaskFunction->arg_begin(), 3)->getType();
4262   if (!Privates.empty()) {
4263     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4264     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4265         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4266         FI->getType(), Privates);
4267     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4268         TaskPrivatesMap, TaskPrivatesMapTy);
4269   } else {
4270     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4271         cast<llvm::PointerType>(TaskPrivatesMapTy));
4272   }
4273   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4274   // kmp_task_t *tt);
4275   llvm::Function *TaskEntry = emitProxyTaskFunction(
4276       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4277       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4278       TaskPrivatesMap);
4279 
4280   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4281   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4282   // kmp_routine_entry_t *task_entry);
4283   // Task flags. Format is taken from
4284   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4285   // description of kmp_tasking_flags struct.
4286   enum {
4287     TiedFlag = 0x1,
4288     FinalFlag = 0x2,
4289     DestructorsFlag = 0x8,
4290     PriorityFlag = 0x20,
4291     DetachableFlag = 0x40,
4292   };
4293   unsigned Flags = Data.Tied ? TiedFlag : 0;
4294   bool NeedsCleanup = false;
4295   if (!Privates.empty()) {
4296     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4297     if (NeedsCleanup)
4298       Flags = Flags | DestructorsFlag;
4299   }
4300   if (Data.Priority.getInt())
4301     Flags = Flags | PriorityFlag;
4302   if (D.hasClausesOfKind<OMPDetachClause>())
4303     Flags = Flags | DetachableFlag;
4304   llvm::Value *TaskFlags =
4305       Data.Final.getPointer()
4306           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4307                                      CGF.Builder.getInt32(FinalFlag),
4308                                      CGF.Builder.getInt32(/*C=*/0))
4309           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4310   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4311   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4312   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4313       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4314       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4315           TaskEntry, KmpRoutineEntryPtrTy)};
4316   llvm::Value *NewTask;
4317   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4318     // Check if we have any device clause associated with the directive.
4319     const Expr *Device = nullptr;
4320     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4321       Device = C->getDevice();
4322     // Emit device ID if any otherwise use default value.
4323     llvm::Value *DeviceID;
4324     if (Device)
4325       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4326                                            CGF.Int64Ty, /*isSigned=*/true);
4327     else
4328       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4329     AllocArgs.push_back(DeviceID);
4330     NewTask = CGF.EmitRuntimeCall(
4331         OMPBuilder.getOrCreateRuntimeFunction(
4332             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4333         AllocArgs);
4334   } else {
4335     NewTask =
4336         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4337                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4338                             AllocArgs);
4339   }
4340   // Emit detach clause initialization.
4341   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4342   // task_descriptor);
4343   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4344     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4345     LValue EvtLVal = CGF.EmitLValue(Evt);
4346 
4347     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4348     // int gtid, kmp_task_t *task);
4349     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4350     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4351     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4352     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4353         OMPBuilder.getOrCreateRuntimeFunction(
4354             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4355         {Loc, Tid, NewTask});
4356     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4357                                       Evt->getExprLoc());
4358     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4359   }
4360   // Process affinity clauses.
4361   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4362     // Process list of affinity data.
4363     ASTContext &C = CGM.getContext();
4364     Address AffinitiesArray = Address::invalid();
4365     // Calculate number of elements to form the array of affinity data.
4366     llvm::Value *NumOfElements = nullptr;
4367     unsigned NumAffinities = 0;
4368     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4369       if (const Expr *Modifier = C->getModifier()) {
4370         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4371         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4372           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4373           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4374           NumOfElements =
4375               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4376         }
4377       } else {
4378         NumAffinities += C->varlist_size();
4379       }
4380     }
4381     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4382     // Fields ids in kmp_task_affinity_info record.
4383     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4384 
4385     QualType KmpTaskAffinityInfoArrayTy;
4386     if (NumOfElements) {
4387       NumOfElements = CGF.Builder.CreateNUWAdd(
4388           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4389       OpaqueValueExpr OVE(
4390           Loc,
4391           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4392           VK_RValue);
4393       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4394                                                     RValue::get(NumOfElements));
4395       KmpTaskAffinityInfoArrayTy =
4396           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4397                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4398       // Properly emit variable-sized array.
4399       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4400                                            ImplicitParamDecl::Other);
4401       CGF.EmitVarDecl(*PD);
4402       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4403       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4404                                                 /*isSigned=*/false);
4405     } else {
4406       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4407           KmpTaskAffinityInfoTy,
4408           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4409           ArrayType::Normal, /*IndexTypeQuals=*/0);
4410       AffinitiesArray =
4411           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4412       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4413       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4414                                              /*isSigned=*/false);
4415     }
4416 
4417     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4418     // Fill array by elements without iterators.
4419     unsigned Pos = 0;
4420     bool HasIterator = false;
4421     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4422       if (C->getModifier()) {
4423         HasIterator = true;
4424         continue;
4425       }
4426       for (const Expr *E : C->varlists()) {
4427         llvm::Value *Addr;
4428         llvm::Value *Size;
4429         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4430         LValue Base =
4431             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4432                                KmpTaskAffinityInfoTy);
4433         // affs[i].base_addr = &<Affinities[i].second>;
4434         LValue BaseAddrLVal = CGF.EmitLValueForField(
4435             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4436         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4437                               BaseAddrLVal);
4438         // affs[i].len = sizeof(<Affinities[i].second>);
4439         LValue LenLVal = CGF.EmitLValueForField(
4440             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4441         CGF.EmitStoreOfScalar(Size, LenLVal);
4442         ++Pos;
4443       }
4444     }
4445     LValue PosLVal;
4446     if (HasIterator) {
4447       PosLVal = CGF.MakeAddrLValue(
4448           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4449           C.getSizeType());
4450       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4451     }
4452     // Process elements with iterators.
4453     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4454       const Expr *Modifier = C->getModifier();
4455       if (!Modifier)
4456         continue;
4457       OMPIteratorGeneratorScope IteratorScope(
4458           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4459       for (const Expr *E : C->varlists()) {
4460         llvm::Value *Addr;
4461         llvm::Value *Size;
4462         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4463         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4464         LValue Base = CGF.MakeAddrLValue(
4465             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4466                     AffinitiesArray.getAlignment()),
4467             KmpTaskAffinityInfoTy);
4468         // affs[i].base_addr = &<Affinities[i].second>;
4469         LValue BaseAddrLVal = CGF.EmitLValueForField(
4470             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4471         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4472                               BaseAddrLVal);
4473         // affs[i].len = sizeof(<Affinities[i].second>);
4474         LValue LenLVal = CGF.EmitLValueForField(
4475             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4476         CGF.EmitStoreOfScalar(Size, LenLVal);
4477         Idx = CGF.Builder.CreateNUWAdd(
4478             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4479         CGF.EmitStoreOfScalar(Idx, PosLVal);
4480       }
4481     }
4482     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4483     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4484     // naffins, kmp_task_affinity_info_t *affin_list);
4485     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4486     llvm::Value *GTid = getThreadID(CGF, Loc);
4487     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4488         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4489     // FIXME: Emit the function and ignore its result for now unless the
4490     // runtime function is properly implemented.
4491     (void)CGF.EmitRuntimeCall(
4492         OMPBuilder.getOrCreateRuntimeFunction(
4493             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4494         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4495   }
4496   llvm::Value *NewTaskNewTaskTTy =
4497       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4498           NewTask, KmpTaskTWithPrivatesPtrTy);
4499   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4500                                                KmpTaskTWithPrivatesQTy);
4501   LValue TDBase =
4502       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4503   // Fill the data in the resulting kmp_task_t record.
4504   // Copy shareds if there are any.
4505   Address KmpTaskSharedsPtr = Address::invalid();
4506   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4507     KmpTaskSharedsPtr =
4508         Address(CGF.EmitLoadOfScalar(
4509                     CGF.EmitLValueForField(
4510                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4511                                            KmpTaskTShareds)),
4512                     Loc),
4513                 CGM.getNaturalTypeAlignment(SharedsTy));
4514     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4515     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4516     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4517   }
4518   // Emit initial values for private copies (if any).
4519   TaskResultTy Result;
4520   if (!Privates.empty()) {
4521     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4522                      SharedsTy, SharedsPtrTy, Data, Privates,
4523                      /*ForDup=*/false);
4524     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4525         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4526       Result.TaskDupFn = emitTaskDupFunction(
4527           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4528           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4529           /*WithLastIter=*/!Data.LastprivateVars.empty());
4530     }
4531   }
4532   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4533   enum { Priority = 0, Destructors = 1 };
4534   // Provide pointer to function with destructors for privates.
4535   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4536   const RecordDecl *KmpCmplrdataUD =
4537       (*FI)->getType()->getAsUnionType()->getDecl();
4538   if (NeedsCleanup) {
4539     llvm::Value *DestructorFn = emitDestructorsFunction(
4540         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4541         KmpTaskTWithPrivatesQTy);
4542     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4543     LValue DestructorsLV = CGF.EmitLValueForField(
4544         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4545     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4546                               DestructorFn, KmpRoutineEntryPtrTy),
4547                           DestructorsLV);
4548   }
4549   // Set priority.
4550   if (Data.Priority.getInt()) {
4551     LValue Data2LV = CGF.EmitLValueForField(
4552         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4553     LValue PriorityLV = CGF.EmitLValueForField(
4554         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4555     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4556   }
4557   Result.NewTask = NewTask;
4558   Result.TaskEntry = TaskEntry;
4559   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4560   Result.TDBase = TDBase;
4561   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4562   return Result;
4563 }
4564 
4565 namespace {
4566 /// Dependence kind for RTL.
4567 enum RTLDependenceKindTy {
4568   DepIn = 0x01,
4569   DepInOut = 0x3,
4570   DepMutexInOutSet = 0x4
4571 };
4572 /// Fields ids in kmp_depend_info record.
4573 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4574 } // namespace
4575 
4576 /// Translates internal dependency kind into the runtime kind.
4577 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4578   RTLDependenceKindTy DepKind;
4579   switch (K) {
4580   case OMPC_DEPEND_in:
4581     DepKind = DepIn;
4582     break;
4583   // Out and InOut dependencies must use the same code.
4584   case OMPC_DEPEND_out:
4585   case OMPC_DEPEND_inout:
4586     DepKind = DepInOut;
4587     break;
4588   case OMPC_DEPEND_mutexinoutset:
4589     DepKind = DepMutexInOutSet;
4590     break;
4591   case OMPC_DEPEND_source:
4592   case OMPC_DEPEND_sink:
4593   case OMPC_DEPEND_depobj:
4594   case OMPC_DEPEND_unknown:
4595     llvm_unreachable("Unknown task dependence type");
4596   }
4597   return DepKind;
4598 }
4599 
4600 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4601 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4602                            QualType &FlagsTy) {
4603   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4604   if (KmpDependInfoTy.isNull()) {
4605     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4606     KmpDependInfoRD->startDefinition();
4607     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4608     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4609     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4610     KmpDependInfoRD->completeDefinition();
4611     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4612   }
4613 }
4614 
4615 std::pair<llvm::Value *, LValue>
4616 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4617                                    SourceLocation Loc) {
4618   ASTContext &C = CGM.getContext();
4619   QualType FlagsTy;
4620   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4621   RecordDecl *KmpDependInfoRD =
4622       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4623   LValue Base = CGF.EmitLoadOfPointerLValue(
4624       DepobjLVal.getAddress(CGF),
4625       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4626   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4627   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4628           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4629   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4630                             Base.getTBAAInfo());
4631   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4632       Addr.getPointer(),
4633       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4634   LValue NumDepsBase = CGF.MakeAddrLValue(
4635       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4636       Base.getBaseInfo(), Base.getTBAAInfo());
4637   // NumDeps = deps[i].base_addr;
4638   LValue BaseAddrLVal = CGF.EmitLValueForField(
4639       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4640   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4641   return std::make_pair(NumDeps, Base);
4642 }
4643 
4644 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4645                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4646                            const OMPTaskDataTy::DependData &Data,
4647                            Address DependenciesArray) {
4648   CodeGenModule &CGM = CGF.CGM;
4649   ASTContext &C = CGM.getContext();
4650   QualType FlagsTy;
4651   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4652   RecordDecl *KmpDependInfoRD =
4653       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4654   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4655 
4656   OMPIteratorGeneratorScope IteratorScope(
4657       CGF, cast_or_null<OMPIteratorExpr>(
4658                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4659                                  : nullptr));
4660   for (const Expr *E : Data.DepExprs) {
4661     llvm::Value *Addr;
4662     llvm::Value *Size;
4663     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4664     LValue Base;
4665     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4666       Base = CGF.MakeAddrLValue(
4667           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4668     } else {
4669       LValue &PosLVal = *Pos.get<LValue *>();
4670       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4671       Base = CGF.MakeAddrLValue(
4672           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4673                   DependenciesArray.getAlignment()),
4674           KmpDependInfoTy);
4675     }
4676     // deps[i].base_addr = &<Dependencies[i].second>;
4677     LValue BaseAddrLVal = CGF.EmitLValueForField(
4678         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4679     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4680                           BaseAddrLVal);
4681     // deps[i].len = sizeof(<Dependencies[i].second>);
4682     LValue LenLVal = CGF.EmitLValueForField(
4683         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4684     CGF.EmitStoreOfScalar(Size, LenLVal);
4685     // deps[i].flags = <Dependencies[i].first>;
4686     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4687     LValue FlagsLVal = CGF.EmitLValueForField(
4688         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4689     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4690                           FlagsLVal);
4691     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4692       ++(*P);
4693     } else {
4694       LValue &PosLVal = *Pos.get<LValue *>();
4695       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4696       Idx = CGF.Builder.CreateNUWAdd(Idx,
4697                                      llvm::ConstantInt::get(Idx->getType(), 1));
4698       CGF.EmitStoreOfScalar(Idx, PosLVal);
4699     }
4700   }
4701 }
4702 
4703 static SmallVector<llvm::Value *, 4>
4704 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4705                         const OMPTaskDataTy::DependData &Data) {
4706   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4707          "Expected depobj dependecy kind.");
4708   SmallVector<llvm::Value *, 4> Sizes;
4709   SmallVector<LValue, 4> SizeLVals;
4710   ASTContext &C = CGF.getContext();
4711   QualType FlagsTy;
4712   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4713   RecordDecl *KmpDependInfoRD =
4714       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4715   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4716   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4717   {
4718     OMPIteratorGeneratorScope IteratorScope(
4719         CGF, cast_or_null<OMPIteratorExpr>(
4720                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4721                                    : nullptr));
4722     for (const Expr *E : Data.DepExprs) {
4723       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4724       LValue Base = CGF.EmitLoadOfPointerLValue(
4725           DepobjLVal.getAddress(CGF),
4726           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4727       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4728           Base.getAddress(CGF), KmpDependInfoPtrT);
4729       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4730                                 Base.getTBAAInfo());
4731       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4732           Addr.getPointer(),
4733           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4734       LValue NumDepsBase = CGF.MakeAddrLValue(
4735           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4736           Base.getBaseInfo(), Base.getTBAAInfo());
4737       // NumDeps = deps[i].base_addr;
4738       LValue BaseAddrLVal = CGF.EmitLValueForField(
4739           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4740       llvm::Value *NumDeps =
4741           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4742       LValue NumLVal = CGF.MakeAddrLValue(
4743           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4744           C.getUIntPtrType());
4745       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4746                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4747       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4748       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4749       CGF.EmitStoreOfScalar(Add, NumLVal);
4750       SizeLVals.push_back(NumLVal);
4751     }
4752   }
4753   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4754     llvm::Value *Size =
4755         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4756     Sizes.push_back(Size);
4757   }
4758   return Sizes;
4759 }
4760 
4761 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4762                                LValue PosLVal,
4763                                const OMPTaskDataTy::DependData &Data,
4764                                Address DependenciesArray) {
4765   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4766          "Expected depobj dependecy kind.");
4767   ASTContext &C = CGF.getContext();
4768   QualType FlagsTy;
4769   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4770   RecordDecl *KmpDependInfoRD =
4771       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4772   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4773   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4774   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4775   {
4776     OMPIteratorGeneratorScope IteratorScope(
4777         CGF, cast_or_null<OMPIteratorExpr>(
4778                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4779                                    : nullptr));
4780     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4781       const Expr *E = Data.DepExprs[I];
4782       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4783       LValue Base = CGF.EmitLoadOfPointerLValue(
4784           DepobjLVal.getAddress(CGF),
4785           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4786       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4787           Base.getAddress(CGF), KmpDependInfoPtrT);
4788       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4789                                 Base.getTBAAInfo());
4790 
4791       // Get number of elements in a single depobj.
4792       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4793           Addr.getPointer(),
4794           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4795       LValue NumDepsBase = CGF.MakeAddrLValue(
4796           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4797           Base.getBaseInfo(), Base.getTBAAInfo());
4798       // NumDeps = deps[i].base_addr;
4799       LValue BaseAddrLVal = CGF.EmitLValueForField(
4800           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4801       llvm::Value *NumDeps =
4802           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4803 
4804       // memcopy dependency data.
4805       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4806           ElSize,
4807           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4808       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4809       Address DepAddr =
4810           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4811                   DependenciesArray.getAlignment());
4812       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4813 
4814       // Increase pos.
4815       // pos += size;
4816       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4817       CGF.EmitStoreOfScalar(Add, PosLVal);
4818     }
4819   }
4820 }
4821 
4822 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4823     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4824     SourceLocation Loc) {
4825   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4826         return D.DepExprs.empty();
4827       }))
4828     return std::make_pair(nullptr, Address::invalid());
4829   // Process list of dependencies.
4830   ASTContext &C = CGM.getContext();
4831   Address DependenciesArray = Address::invalid();
4832   llvm::Value *NumOfElements = nullptr;
4833   unsigned NumDependencies = std::accumulate(
4834       Dependencies.begin(), Dependencies.end(), 0,
4835       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4836         return D.DepKind == OMPC_DEPEND_depobj
4837                    ? V
4838                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4839       });
4840   QualType FlagsTy;
4841   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4842   bool HasDepobjDeps = false;
4843   bool HasRegularWithIterators = false;
4844   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4845   llvm::Value *NumOfRegularWithIterators =
4846       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4847   // Calculate number of depobj dependecies and regular deps with the iterators.
4848   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4849     if (D.DepKind == OMPC_DEPEND_depobj) {
4850       SmallVector<llvm::Value *, 4> Sizes =
4851           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4852       for (llvm::Value *Size : Sizes) {
4853         NumOfDepobjElements =
4854             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4855       }
4856       HasDepobjDeps = true;
4857       continue;
4858     }
4859     // Include number of iterations, if any.
4860     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4861       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4862         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4863         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4864         NumOfRegularWithIterators =
4865             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4866       }
4867       HasRegularWithIterators = true;
4868       continue;
4869     }
4870   }
4871 
4872   QualType KmpDependInfoArrayTy;
4873   if (HasDepobjDeps || HasRegularWithIterators) {
4874     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4875                                            /*isSigned=*/false);
4876     if (HasDepobjDeps) {
4877       NumOfElements =
4878           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4879     }
4880     if (HasRegularWithIterators) {
4881       NumOfElements =
4882           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4883     }
4884     OpaqueValueExpr OVE(Loc,
4885                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4886                         VK_RValue);
4887     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4888                                                   RValue::get(NumOfElements));
4889     KmpDependInfoArrayTy =
4890         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4891                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4892     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4893     // Properly emit variable-sized array.
4894     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4895                                          ImplicitParamDecl::Other);
4896     CGF.EmitVarDecl(*PD);
4897     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4898     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4899                                               /*isSigned=*/false);
4900   } else {
4901     KmpDependInfoArrayTy = C.getConstantArrayType(
4902         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4903         ArrayType::Normal, /*IndexTypeQuals=*/0);
4904     DependenciesArray =
4905         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4906     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4907     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4908                                            /*isSigned=*/false);
4909   }
4910   unsigned Pos = 0;
4911   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4912     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4913         Dependencies[I].IteratorExpr)
4914       continue;
4915     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4916                    DependenciesArray);
4917   }
4918   // Copy regular dependecies with iterators.
4919   LValue PosLVal = CGF.MakeAddrLValue(
4920       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4921   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4922   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4923     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4924         !Dependencies[I].IteratorExpr)
4925       continue;
4926     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4927                    DependenciesArray);
4928   }
4929   // Copy final depobj arrays without iterators.
4930   if (HasDepobjDeps) {
4931     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4932       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4933         continue;
4934       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4935                          DependenciesArray);
4936     }
4937   }
4938   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4939       DependenciesArray, CGF.VoidPtrTy);
4940   return std::make_pair(NumOfElements, DependenciesArray);
4941 }
4942 
4943 Address CGOpenMPRuntime::emitDepobjDependClause(
4944     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4945     SourceLocation Loc) {
4946   if (Dependencies.DepExprs.empty())
4947     return Address::invalid();
4948   // Process list of dependencies.
4949   ASTContext &C = CGM.getContext();
4950   Address DependenciesArray = Address::invalid();
4951   unsigned NumDependencies = Dependencies.DepExprs.size();
4952   QualType FlagsTy;
4953   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4954   RecordDecl *KmpDependInfoRD =
4955       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4956 
4957   llvm::Value *Size;
4958   // Define type kmp_depend_info[<Dependencies.size()>];
4959   // For depobj reserve one extra element to store the number of elements.
4960   // It is required to handle depobj(x) update(in) construct.
4961   // kmp_depend_info[<Dependencies.size()>] deps;
4962   llvm::Value *NumDepsVal;
4963   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4964   if (const auto *IE =
4965           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4966     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4967     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4968       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4969       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4970       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4971     }
4972     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4973                                     NumDepsVal);
4974     CharUnits SizeInBytes =
4975         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4976     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4977     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4978     NumDepsVal =
4979         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4980   } else {
4981     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4982         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4983         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4984     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4985     Size = CGM.getSize(Sz.alignTo(Align));
4986     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4987   }
4988   // Need to allocate on the dynamic memory.
4989   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4990   // Use default allocator.
4991   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4992   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4993 
4994   llvm::Value *Addr =
4995       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4996                               CGM.getModule(), OMPRTL___kmpc_alloc),
4997                           Args, ".dep.arr.addr");
4998   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4999       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5000   DependenciesArray = Address(Addr, Align);
5001   // Write number of elements in the first element of array for depobj.
5002   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5003   // deps[i].base_addr = NumDependencies;
5004   LValue BaseAddrLVal = CGF.EmitLValueForField(
5005       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5006   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5007   llvm::PointerUnion<unsigned *, LValue *> Pos;
5008   unsigned Idx = 1;
5009   LValue PosLVal;
5010   if (Dependencies.IteratorExpr) {
5011     PosLVal = CGF.MakeAddrLValue(
5012         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5013         C.getSizeType());
5014     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5015                           /*IsInit=*/true);
5016     Pos = &PosLVal;
5017   } else {
5018     Pos = &Idx;
5019   }
5020   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5021   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5023   return DependenciesArray;
5024 }
5025 
5026 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5027                                         SourceLocation Loc) {
5028   ASTContext &C = CGM.getContext();
5029   QualType FlagsTy;
5030   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5031   LValue Base = CGF.EmitLoadOfPointerLValue(
5032       DepobjLVal.getAddress(CGF),
5033       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5034   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5035   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5036       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5037   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5038       Addr.getPointer(),
5039       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5040   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5041                                                                CGF.VoidPtrTy);
5042   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5043   // Use default allocator.
5044   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5045   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5046 
5047   // _kmpc_free(gtid, addr, nullptr);
5048   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5049                                 CGM.getModule(), OMPRTL___kmpc_free),
5050                             Args);
5051 }
5052 
5053 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5054                                        OpenMPDependClauseKind NewDepKind,
5055                                        SourceLocation Loc) {
5056   ASTContext &C = CGM.getContext();
5057   QualType FlagsTy;
5058   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5059   RecordDecl *KmpDependInfoRD =
5060       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5061   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5062   llvm::Value *NumDeps;
5063   LValue Base;
5064   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5065 
5066   Address Begin = Base.getAddress(CGF);
5067   // Cast from pointer to array type to pointer to single element.
5068   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5069   // The basic structure here is a while-do loop.
5070   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5071   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5072   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5073   CGF.EmitBlock(BodyBB);
5074   llvm::PHINode *ElementPHI =
5075       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5076   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5077   Begin = Address(ElementPHI, Begin.getAlignment());
5078   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5079                             Base.getTBAAInfo());
5080   // deps[i].flags = NewDepKind;
5081   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5082   LValue FlagsLVal = CGF.EmitLValueForField(
5083       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5084   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5085                         FlagsLVal);
5086 
5087   // Shift the address forward by one element.
5088   Address ElementNext =
5089       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5090   ElementPHI->addIncoming(ElementNext.getPointer(),
5091                           CGF.Builder.GetInsertBlock());
5092   llvm::Value *IsEmpty =
5093       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5094   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5095   // Done.
5096   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5097 }
5098 
5099 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5100                                    const OMPExecutableDirective &D,
5101                                    llvm::Function *TaskFunction,
5102                                    QualType SharedsTy, Address Shareds,
5103                                    const Expr *IfCond,
5104                                    const OMPTaskDataTy &Data) {
5105   if (!CGF.HaveInsertPoint())
5106     return;
5107 
5108   TaskResultTy Result =
5109       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5110   llvm::Value *NewTask = Result.NewTask;
5111   llvm::Function *TaskEntry = Result.TaskEntry;
5112   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5113   LValue TDBase = Result.TDBase;
5114   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5115   // Process list of dependences.
5116   Address DependenciesArray = Address::invalid();
5117   llvm::Value *NumOfElements;
5118   std::tie(NumOfElements, DependenciesArray) =
5119       emitDependClause(CGF, Data.Dependences, Loc);
5120 
5121   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5122   // libcall.
5123   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5124   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5125   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5126   // list is not empty
5127   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5128   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5129   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5130   llvm::Value *DepTaskArgs[7];
5131   if (!Data.Dependences.empty()) {
5132     DepTaskArgs[0] = UpLoc;
5133     DepTaskArgs[1] = ThreadID;
5134     DepTaskArgs[2] = NewTask;
5135     DepTaskArgs[3] = NumOfElements;
5136     DepTaskArgs[4] = DependenciesArray.getPointer();
5137     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5138     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5139   }
5140   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5141                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5142     if (!Data.Tied) {
5143       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5144       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5145       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5146     }
5147     if (!Data.Dependences.empty()) {
5148       CGF.EmitRuntimeCall(
5149           OMPBuilder.getOrCreateRuntimeFunction(
5150               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5151           DepTaskArgs);
5152     } else {
5153       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5154                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5155                           TaskArgs);
5156     }
5157     // Check if parent region is untied and build return for untied task;
5158     if (auto *Region =
5159             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5160       Region->emitUntiedSwitch(CGF);
5161   };
5162 
5163   llvm::Value *DepWaitTaskArgs[6];
5164   if (!Data.Dependences.empty()) {
5165     DepWaitTaskArgs[0] = UpLoc;
5166     DepWaitTaskArgs[1] = ThreadID;
5167     DepWaitTaskArgs[2] = NumOfElements;
5168     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5169     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5170     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5171   }
5172   auto &M = CGM.getModule();
5173   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5174                         TaskEntry, &Data, &DepWaitTaskArgs,
5175                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5176     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5177     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5178     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5179     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5180     // is specified.
5181     if (!Data.Dependences.empty())
5182       CGF.EmitRuntimeCall(
5183           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5184           DepWaitTaskArgs);
5185     // Call proxy_task_entry(gtid, new_task);
5186     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5187                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5188       Action.Enter(CGF);
5189       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5190       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5191                                                           OutlinedFnArgs);
5192     };
5193 
5194     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5195     // kmp_task_t *new_task);
5196     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5197     // kmp_task_t *new_task);
5198     RegionCodeGenTy RCG(CodeGen);
5199     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5200                               M, OMPRTL___kmpc_omp_task_begin_if0),
5201                           TaskArgs,
5202                           OMPBuilder.getOrCreateRuntimeFunction(
5203                               M, OMPRTL___kmpc_omp_task_complete_if0),
5204                           TaskArgs);
5205     RCG.setAction(Action);
5206     RCG(CGF);
5207   };
5208 
5209   if (IfCond) {
5210     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5211   } else {
5212     RegionCodeGenTy ThenRCG(ThenCodeGen);
5213     ThenRCG(CGF);
5214   }
5215 }
5216 
5217 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5218                                        const OMPLoopDirective &D,
5219                                        llvm::Function *TaskFunction,
5220                                        QualType SharedsTy, Address Shareds,
5221                                        const Expr *IfCond,
5222                                        const OMPTaskDataTy &Data) {
5223   if (!CGF.HaveInsertPoint())
5224     return;
5225   TaskResultTy Result =
5226       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5227   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5228   // libcall.
5229   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5230   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5231   // sched, kmp_uint64 grainsize, void *task_dup);
5232   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5233   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5234   llvm::Value *IfVal;
5235   if (IfCond) {
5236     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5237                                       /*isSigned=*/true);
5238   } else {
5239     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5240   }
5241 
5242   LValue LBLVal = CGF.EmitLValueForField(
5243       Result.TDBase,
5244       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5245   const auto *LBVar =
5246       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5247   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5248                        LBLVal.getQuals(),
5249                        /*IsInitializer=*/true);
5250   LValue UBLVal = CGF.EmitLValueForField(
5251       Result.TDBase,
5252       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5253   const auto *UBVar =
5254       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5255   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5256                        UBLVal.getQuals(),
5257                        /*IsInitializer=*/true);
5258   LValue StLVal = CGF.EmitLValueForField(
5259       Result.TDBase,
5260       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5261   const auto *StVar =
5262       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5263   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5264                        StLVal.getQuals(),
5265                        /*IsInitializer=*/true);
5266   // Store reductions address.
5267   LValue RedLVal = CGF.EmitLValueForField(
5268       Result.TDBase,
5269       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5270   if (Data.Reductions) {
5271     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5272   } else {
5273     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5274                                CGF.getContext().VoidPtrTy);
5275   }
5276   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5277   llvm::Value *TaskArgs[] = {
5278       UpLoc,
5279       ThreadID,
5280       Result.NewTask,
5281       IfVal,
5282       LBLVal.getPointer(CGF),
5283       UBLVal.getPointer(CGF),
5284       CGF.EmitLoadOfScalar(StLVal, Loc),
5285       llvm::ConstantInt::getSigned(
5286           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5287       llvm::ConstantInt::getSigned(
5288           CGF.IntTy, Data.Schedule.getPointer()
5289                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5290                          : NoSchedule),
5291       Data.Schedule.getPointer()
5292           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5293                                       /*isSigned=*/false)
5294           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5295       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5296                              Result.TaskDupFn, CGF.VoidPtrTy)
5297                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5298   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5299                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5300                       TaskArgs);
5301 }
5302 
5303 /// Emit reduction operation for each element of array (required for
5304 /// array sections) LHS op = RHS.
5305 /// \param Type Type of array.
5306 /// \param LHSVar Variable on the left side of the reduction operation
5307 /// (references element of array in original variable).
5308 /// \param RHSVar Variable on the right side of the reduction operation
5309 /// (references element of array in original variable).
5310 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5311 /// RHSVar.
5312 static void EmitOMPAggregateReduction(
5313     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5314     const VarDecl *RHSVar,
5315     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5316                                   const Expr *, const Expr *)> &RedOpGen,
5317     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5318     const Expr *UpExpr = nullptr) {
5319   // Perform element-by-element initialization.
5320   QualType ElementTy;
5321   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5322   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5323 
5324   // Drill down to the base element type on both arrays.
5325   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5326   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5327 
5328   llvm::Value *RHSBegin = RHSAddr.getPointer();
5329   llvm::Value *LHSBegin = LHSAddr.getPointer();
5330   // Cast from pointer to array type to pointer to single element.
5331   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5332   // The basic structure here is a while-do loop.
5333   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5334   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5335   llvm::Value *IsEmpty =
5336       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5337   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5338 
5339   // Enter the loop body, making that address the current address.
5340   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5341   CGF.EmitBlock(BodyBB);
5342 
5343   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5344 
5345   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5346       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5347   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5348   Address RHSElementCurrent =
5349       Address(RHSElementPHI,
5350               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5351 
5352   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5353       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5354   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5355   Address LHSElementCurrent =
5356       Address(LHSElementPHI,
5357               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5358 
5359   // Emit copy.
5360   CodeGenFunction::OMPPrivateScope Scope(CGF);
5361   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5362   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5363   Scope.Privatize();
5364   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5365   Scope.ForceCleanup();
5366 
5367   // Shift the address forward by one element.
5368   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5369       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5370   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5371       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5372   // Check whether we've reached the end.
5373   llvm::Value *Done =
5374       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5375   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5376   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5377   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5378 
5379   // Done.
5380   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5381 }
5382 
5383 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5384 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5385 /// UDR combiner function.
5386 static void emitReductionCombiner(CodeGenFunction &CGF,
5387                                   const Expr *ReductionOp) {
5388   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5389     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5390       if (const auto *DRE =
5391               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5392         if (const auto *DRD =
5393                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5394           std::pair<llvm::Function *, llvm::Function *> Reduction =
5395               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5396           RValue Func = RValue::get(Reduction.first);
5397           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5398           CGF.EmitIgnoredExpr(ReductionOp);
5399           return;
5400         }
5401   CGF.EmitIgnoredExpr(ReductionOp);
5402 }
5403 
5404 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5405     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5406     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5407     ArrayRef<const Expr *> ReductionOps) {
5408   ASTContext &C = CGM.getContext();
5409 
5410   // void reduction_func(void *LHSArg, void *RHSArg);
5411   FunctionArgList Args;
5412   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5413                            ImplicitParamDecl::Other);
5414   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5415                            ImplicitParamDecl::Other);
5416   Args.push_back(&LHSArg);
5417   Args.push_back(&RHSArg);
5418   const auto &CGFI =
5419       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5420   std::string Name = getName({"omp", "reduction", "reduction_func"});
5421   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5422                                     llvm::GlobalValue::InternalLinkage, Name,
5423                                     &CGM.getModule());
5424   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5425   Fn->setDoesNotRecurse();
5426   CodeGenFunction CGF(CGM);
5427   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5428 
5429   // Dst = (void*[n])(LHSArg);
5430   // Src = (void*[n])(RHSArg);
5431   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5432       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5433       ArgsType), CGF.getPointerAlign());
5434   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5436       ArgsType), CGF.getPointerAlign());
5437 
5438   //  ...
5439   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5440   //  ...
5441   CodeGenFunction::OMPPrivateScope Scope(CGF);
5442   auto IPriv = Privates.begin();
5443   unsigned Idx = 0;
5444   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5445     const auto *RHSVar =
5446         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5447     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5448       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5449     });
5450     const auto *LHSVar =
5451         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5452     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5453       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5454     });
5455     QualType PrivTy = (*IPriv)->getType();
5456     if (PrivTy->isVariablyModifiedType()) {
5457       // Get array size and emit VLA type.
5458       ++Idx;
5459       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5460       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5461       const VariableArrayType *VLA =
5462           CGF.getContext().getAsVariableArrayType(PrivTy);
5463       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5464       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5465           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5466       CGF.EmitVariablyModifiedType(PrivTy);
5467     }
5468   }
5469   Scope.Privatize();
5470   IPriv = Privates.begin();
5471   auto ILHS = LHSExprs.begin();
5472   auto IRHS = RHSExprs.begin();
5473   for (const Expr *E : ReductionOps) {
5474     if ((*IPriv)->getType()->isArrayType()) {
5475       // Emit reduction for array section.
5476       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5477       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5478       EmitOMPAggregateReduction(
5479           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5480           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481             emitReductionCombiner(CGF, E);
5482           });
5483     } else {
5484       // Emit reduction for array subscript or single variable.
5485       emitReductionCombiner(CGF, E);
5486     }
5487     ++IPriv;
5488     ++ILHS;
5489     ++IRHS;
5490   }
5491   Scope.ForceCleanup();
5492   CGF.FinishFunction();
5493   return Fn;
5494 }
5495 
5496 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5497                                                   const Expr *ReductionOp,
5498                                                   const Expr *PrivateRef,
5499                                                   const DeclRefExpr *LHS,
5500                                                   const DeclRefExpr *RHS) {
5501   if (PrivateRef->getType()->isArrayType()) {
5502     // Emit reduction for array section.
5503     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5504     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5505     EmitOMPAggregateReduction(
5506         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5507         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5508           emitReductionCombiner(CGF, ReductionOp);
5509         });
5510   } else {
5511     // Emit reduction for array subscript or single variable.
5512     emitReductionCombiner(CGF, ReductionOp);
5513   }
5514 }
5515 
5516 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5517                                     ArrayRef<const Expr *> Privates,
5518                                     ArrayRef<const Expr *> LHSExprs,
5519                                     ArrayRef<const Expr *> RHSExprs,
5520                                     ArrayRef<const Expr *> ReductionOps,
5521                                     ReductionOptionsTy Options) {
5522   if (!CGF.HaveInsertPoint())
5523     return;
5524 
5525   bool WithNowait = Options.WithNowait;
5526   bool SimpleReduction = Options.SimpleReduction;
5527 
5528   // Next code should be emitted for reduction:
5529   //
5530   // static kmp_critical_name lock = { 0 };
5531   //
5532   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5533   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5534   //  ...
5535   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5536   //  *(Type<n>-1*)rhs[<n>-1]);
5537   // }
5538   //
5539   // ...
5540   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5541   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5542   // RedList, reduce_func, &<lock>)) {
5543   // case 1:
5544   //  ...
5545   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5546   //  ...
5547   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5548   // break;
5549   // case 2:
5550   //  ...
5551   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5552   //  ...
5553   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5554   // break;
5555   // default:;
5556   // }
5557   //
5558   // if SimpleReduction is true, only the next code is generated:
5559   //  ...
5560   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5561   //  ...
5562 
5563   ASTContext &C = CGM.getContext();
5564 
5565   if (SimpleReduction) {
5566     CodeGenFunction::RunCleanupsScope Scope(CGF);
5567     auto IPriv = Privates.begin();
5568     auto ILHS = LHSExprs.begin();
5569     auto IRHS = RHSExprs.begin();
5570     for (const Expr *E : ReductionOps) {
5571       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5572                                   cast<DeclRefExpr>(*IRHS));
5573       ++IPriv;
5574       ++ILHS;
5575       ++IRHS;
5576     }
5577     return;
5578   }
5579 
5580   // 1. Build a list of reduction variables.
5581   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5582   auto Size = RHSExprs.size();
5583   for (const Expr *E : Privates) {
5584     if (E->getType()->isVariablyModifiedType())
5585       // Reserve place for array size.
5586       ++Size;
5587   }
5588   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5589   QualType ReductionArrayTy =
5590       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5591                              /*IndexTypeQuals=*/0);
5592   Address ReductionList =
5593       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5594   auto IPriv = Privates.begin();
5595   unsigned Idx = 0;
5596   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5597     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5598     CGF.Builder.CreateStore(
5599         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5601         Elem);
5602     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5603       // Store array size.
5604       ++Idx;
5605       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5606       llvm::Value *Size = CGF.Builder.CreateIntCast(
5607           CGF.getVLASize(
5608                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5609               .NumElts,
5610           CGF.SizeTy, /*isSigned=*/false);
5611       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5612                               Elem);
5613     }
5614   }
5615 
5616   // 2. Emit reduce_func().
5617   llvm::Function *ReductionFn = emitReductionFunction(
5618       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5619       LHSExprs, RHSExprs, ReductionOps);
5620 
5621   // 3. Create static kmp_critical_name lock = { 0 };
5622   std::string Name = getName({"reduction"});
5623   llvm::Value *Lock = getCriticalRegionLock(Name);
5624 
5625   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5626   // RedList, reduce_func, &<lock>);
5627   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5628   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5629   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5630   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5631       ReductionList.getPointer(), CGF.VoidPtrTy);
5632   llvm::Value *Args[] = {
5633       IdentTLoc,                             // ident_t *<loc>
5634       ThreadId,                              // i32 <gtid>
5635       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5636       ReductionArrayTySize,                  // size_type sizeof(RedList)
5637       RL,                                    // void *RedList
5638       ReductionFn, // void (*) (void *, void *) <reduce_func>
5639       Lock         // kmp_critical_name *&<lock>
5640   };
5641   llvm::Value *Res = CGF.EmitRuntimeCall(
5642       OMPBuilder.getOrCreateRuntimeFunction(
5643           CGM.getModule(),
5644           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5645       Args);
5646 
5647   // 5. Build switch(res)
5648   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5649   llvm::SwitchInst *SwInst =
5650       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5651 
5652   // 6. Build case 1:
5653   //  ...
5654   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5655   //  ...
5656   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5657   // break;
5658   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5659   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5660   CGF.EmitBlock(Case1BB);
5661 
5662   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5663   llvm::Value *EndArgs[] = {
5664       IdentTLoc, // ident_t *<loc>
5665       ThreadId,  // i32 <gtid>
5666       Lock       // kmp_critical_name *&<lock>
5667   };
5668   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5669                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5670     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5671     auto IPriv = Privates.begin();
5672     auto ILHS = LHSExprs.begin();
5673     auto IRHS = RHSExprs.begin();
5674     for (const Expr *E : ReductionOps) {
5675       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5676                                      cast<DeclRefExpr>(*IRHS));
5677       ++IPriv;
5678       ++ILHS;
5679       ++IRHS;
5680     }
5681   };
5682   RegionCodeGenTy RCG(CodeGen);
5683   CommonActionTy Action(
5684       nullptr, llvm::None,
5685       OMPBuilder.getOrCreateRuntimeFunction(
5686           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5687                                       : OMPRTL___kmpc_end_reduce),
5688       EndArgs);
5689   RCG.setAction(Action);
5690   RCG(CGF);
5691 
5692   CGF.EmitBranch(DefaultBB);
5693 
5694   // 7. Build case 2:
5695   //  ...
5696   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5697   //  ...
5698   // break;
5699   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5700   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5701   CGF.EmitBlock(Case2BB);
5702 
5703   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5704                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5705     auto ILHS = LHSExprs.begin();
5706     auto IRHS = RHSExprs.begin();
5707     auto IPriv = Privates.begin();
5708     for (const Expr *E : ReductionOps) {
5709       const Expr *XExpr = nullptr;
5710       const Expr *EExpr = nullptr;
5711       const Expr *UpExpr = nullptr;
5712       BinaryOperatorKind BO = BO_Comma;
5713       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5714         if (BO->getOpcode() == BO_Assign) {
5715           XExpr = BO->getLHS();
5716           UpExpr = BO->getRHS();
5717         }
5718       }
5719       // Try to emit update expression as a simple atomic.
5720       const Expr *RHSExpr = UpExpr;
5721       if (RHSExpr) {
5722         // Analyze RHS part of the whole expression.
5723         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5724                 RHSExpr->IgnoreParenImpCasts())) {
5725           // If this is a conditional operator, analyze its condition for
5726           // min/max reduction operator.
5727           RHSExpr = ACO->getCond();
5728         }
5729         if (const auto *BORHS =
5730                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5731           EExpr = BORHS->getRHS();
5732           BO = BORHS->getOpcode();
5733         }
5734       }
5735       if (XExpr) {
5736         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5737         auto &&AtomicRedGen = [BO, VD,
5738                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5739                                     const Expr *EExpr, const Expr *UpExpr) {
5740           LValue X = CGF.EmitLValue(XExpr);
5741           RValue E;
5742           if (EExpr)
5743             E = CGF.EmitAnyExpr(EExpr);
5744           CGF.EmitOMPAtomicSimpleUpdateExpr(
5745               X, E, BO, /*IsXLHSInRHSPart=*/true,
5746               llvm::AtomicOrdering::Monotonic, Loc,
5747               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5748                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5749                 PrivateScope.addPrivate(
5750                     VD, [&CGF, VD, XRValue, Loc]() {
5751                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5752                       CGF.emitOMPSimpleStore(
5753                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5754                           VD->getType().getNonReferenceType(), Loc);
5755                       return LHSTemp;
5756                     });
5757                 (void)PrivateScope.Privatize();
5758                 return CGF.EmitAnyExpr(UpExpr);
5759               });
5760         };
5761         if ((*IPriv)->getType()->isArrayType()) {
5762           // Emit atomic reduction for array section.
5763           const auto *RHSVar =
5764               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5765           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5766                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5767         } else {
5768           // Emit atomic reduction for array subscript or single variable.
5769           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5770         }
5771       } else {
5772         // Emit as a critical region.
5773         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5774                                            const Expr *, const Expr *) {
5775           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5776           std::string Name = RT.getName({"atomic_reduction"});
5777           RT.emitCriticalRegion(
5778               CGF, Name,
5779               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5780                 Action.Enter(CGF);
5781                 emitReductionCombiner(CGF, E);
5782               },
5783               Loc);
5784         };
5785         if ((*IPriv)->getType()->isArrayType()) {
5786           const auto *LHSVar =
5787               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5788           const auto *RHSVar =
5789               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5790           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5791                                     CritRedGen);
5792         } else {
5793           CritRedGen(CGF, nullptr, nullptr, nullptr);
5794         }
5795       }
5796       ++ILHS;
5797       ++IRHS;
5798       ++IPriv;
5799     }
5800   };
5801   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5802   if (!WithNowait) {
5803     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5804     llvm::Value *EndArgs[] = {
5805         IdentTLoc, // ident_t *<loc>
5806         ThreadId,  // i32 <gtid>
5807         Lock       // kmp_critical_name *&<lock>
5808     };
5809     CommonActionTy Action(nullptr, llvm::None,
5810                           OMPBuilder.getOrCreateRuntimeFunction(
5811                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5812                           EndArgs);
5813     AtomicRCG.setAction(Action);
5814     AtomicRCG(CGF);
5815   } else {
5816     AtomicRCG(CGF);
5817   }
5818 
5819   CGF.EmitBranch(DefaultBB);
5820   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5821 }
5822 
5823 /// Generates unique name for artificial threadprivate variables.
5824 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5825 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5826                                       const Expr *Ref) {
5827   SmallString<256> Buffer;
5828   llvm::raw_svector_ostream Out(Buffer);
5829   const clang::DeclRefExpr *DE;
5830   const VarDecl *D = ::getBaseDecl(Ref, DE);
5831   if (!D)
5832     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5833   D = D->getCanonicalDecl();
5834   std::string Name = CGM.getOpenMPRuntime().getName(
5835       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5836   Out << Prefix << Name << "_"
5837       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5838   return std::string(Out.str());
5839 }
5840 
5841 /// Emits reduction initializer function:
5842 /// \code
5843 /// void @.red_init(void* %arg, void* %orig) {
5844 /// %0 = bitcast void* %arg to <type>*
5845 /// store <type> <init>, <type>* %0
5846 /// ret void
5847 /// }
5848 /// \endcode
5849 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5850                                            SourceLocation Loc,
5851                                            ReductionCodeGen &RCG, unsigned N) {
5852   ASTContext &C = CGM.getContext();
5853   QualType VoidPtrTy = C.VoidPtrTy;
5854   VoidPtrTy.addRestrict();
5855   FunctionArgList Args;
5856   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5857                           ImplicitParamDecl::Other);
5858   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5859                               ImplicitParamDecl::Other);
5860   Args.emplace_back(&Param);
5861   Args.emplace_back(&ParamOrig);
5862   const auto &FnInfo =
5863       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5864   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5865   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5866   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5867                                     Name, &CGM.getModule());
5868   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5869   Fn->setDoesNotRecurse();
5870   CodeGenFunction CGF(CGM);
5871   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5872   Address PrivateAddr = CGF.EmitLoadOfPointer(
5873       CGF.GetAddrOfLocalVar(&Param),
5874       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5875   llvm::Value *Size = nullptr;
5876   // If the size of the reduction item is non-constant, load it from global
5877   // threadprivate variable.
5878   if (RCG.getSizes(N).second) {
5879     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5880         CGF, CGM.getContext().getSizeType(),
5881         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5882     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5883                                 CGM.getContext().getSizeType(), Loc);
5884   }
5885   RCG.emitAggregateType(CGF, N, Size);
5886   LValue OrigLVal;
5887   // If initializer uses initializer from declare reduction construct, emit a
5888   // pointer to the address of the original reduction item (reuired by reduction
5889   // initializer)
5890   if (RCG.usesReductionInitializer(N)) {
5891     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5892     SharedAddr = CGF.EmitLoadOfPointer(
5893         SharedAddr,
5894         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5895     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5896   } else {
5897     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5898         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5899         CGM.getContext().VoidPtrTy);
5900   }
5901   // Emit the initializer:
5902   // %0 = bitcast void* %arg to <type>*
5903   // store <type> <init>, <type>* %0
5904   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5905                          [](CodeGenFunction &) { return false; });
5906   CGF.FinishFunction();
5907   return Fn;
5908 }
5909 
5910 /// Emits reduction combiner function:
5911 /// \code
5912 /// void @.red_comb(void* %arg0, void* %arg1) {
5913 /// %lhs = bitcast void* %arg0 to <type>*
5914 /// %rhs = bitcast void* %arg1 to <type>*
5915 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5916 /// store <type> %2, <type>* %lhs
5917 /// ret void
5918 /// }
5919 /// \endcode
5920 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5921                                            SourceLocation Loc,
5922                                            ReductionCodeGen &RCG, unsigned N,
5923                                            const Expr *ReductionOp,
5924                                            const Expr *LHS, const Expr *RHS,
5925                                            const Expr *PrivateRef) {
5926   ASTContext &C = CGM.getContext();
5927   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5928   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5929   FunctionArgList Args;
5930   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5931                                C.VoidPtrTy, ImplicitParamDecl::Other);
5932   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5933                             ImplicitParamDecl::Other);
5934   Args.emplace_back(&ParamInOut);
5935   Args.emplace_back(&ParamIn);
5936   const auto &FnInfo =
5937       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5938   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5939   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5940   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5941                                     Name, &CGM.getModule());
5942   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5943   Fn->setDoesNotRecurse();
5944   CodeGenFunction CGF(CGM);
5945   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5946   llvm::Value *Size = nullptr;
5947   // If the size of the reduction item is non-constant, load it from global
5948   // threadprivate variable.
5949   if (RCG.getSizes(N).second) {
5950     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5951         CGF, CGM.getContext().getSizeType(),
5952         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5953     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5954                                 CGM.getContext().getSizeType(), Loc);
5955   }
5956   RCG.emitAggregateType(CGF, N, Size);
5957   // Remap lhs and rhs variables to the addresses of the function arguments.
5958   // %lhs = bitcast void* %arg0 to <type>*
5959   // %rhs = bitcast void* %arg1 to <type>*
5960   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5961   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5962     // Pull out the pointer to the variable.
5963     Address PtrAddr = CGF.EmitLoadOfPointer(
5964         CGF.GetAddrOfLocalVar(&ParamInOut),
5965         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5966     return CGF.Builder.CreateElementBitCast(
5967         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5968   });
5969   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5970     // Pull out the pointer to the variable.
5971     Address PtrAddr = CGF.EmitLoadOfPointer(
5972         CGF.GetAddrOfLocalVar(&ParamIn),
5973         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5974     return CGF.Builder.CreateElementBitCast(
5975         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5976   });
5977   PrivateScope.Privatize();
5978   // Emit the combiner body:
5979   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5980   // store <type> %2, <type>* %lhs
5981   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5982       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5983       cast<DeclRefExpr>(RHS));
5984   CGF.FinishFunction();
5985   return Fn;
5986 }
5987 
5988 /// Emits reduction finalizer function:
5989 /// \code
5990 /// void @.red_fini(void* %arg) {
5991 /// %0 = bitcast void* %arg to <type>*
5992 /// <destroy>(<type>* %0)
5993 /// ret void
5994 /// }
5995 /// \endcode
5996 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5997                                            SourceLocation Loc,
5998                                            ReductionCodeGen &RCG, unsigned N) {
5999   if (!RCG.needCleanups(N))
6000     return nullptr;
6001   ASTContext &C = CGM.getContext();
6002   FunctionArgList Args;
6003   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6004                           ImplicitParamDecl::Other);
6005   Args.emplace_back(&Param);
6006   const auto &FnInfo =
6007       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6008   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6009   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6010   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6011                                     Name, &CGM.getModule());
6012   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6013   Fn->setDoesNotRecurse();
6014   CodeGenFunction CGF(CGM);
6015   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6016   Address PrivateAddr = CGF.EmitLoadOfPointer(
6017       CGF.GetAddrOfLocalVar(&Param),
6018       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6019   llvm::Value *Size = nullptr;
6020   // If the size of the reduction item is non-constant, load it from global
6021   // threadprivate variable.
6022   if (RCG.getSizes(N).second) {
6023     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6024         CGF, CGM.getContext().getSizeType(),
6025         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6026     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6027                                 CGM.getContext().getSizeType(), Loc);
6028   }
6029   RCG.emitAggregateType(CGF, N, Size);
6030   // Emit the finalizer body:
6031   // <destroy>(<type>* %0)
6032   RCG.emitCleanups(CGF, N, PrivateAddr);
6033   CGF.FinishFunction(Loc);
6034   return Fn;
6035 }
6036 
6037 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6038     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6039     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6040   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6041     return nullptr;
6042 
6043   // Build typedef struct:
6044   // kmp_taskred_input {
6045   //   void *reduce_shar; // shared reduction item
6046   //   void *reduce_orig; // original reduction item used for initialization
6047   //   size_t reduce_size; // size of data item
6048   //   void *reduce_init; // data initialization routine
6049   //   void *reduce_fini; // data finalization routine
6050   //   void *reduce_comb; // data combiner routine
6051   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6052   // } kmp_taskred_input_t;
6053   ASTContext &C = CGM.getContext();
6054   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6055   RD->startDefinition();
6056   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6057   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6058   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6059   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6060   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6061   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6062   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6063       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6064   RD->completeDefinition();
6065   QualType RDType = C.getRecordType(RD);
6066   unsigned Size = Data.ReductionVars.size();
6067   llvm::APInt ArraySize(/*numBits=*/64, Size);
6068   QualType ArrayRDType = C.getConstantArrayType(
6069       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6070   // kmp_task_red_input_t .rd_input.[Size];
6071   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6072   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6073                        Data.ReductionCopies, Data.ReductionOps);
6074   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6075     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6076     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6077                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6078     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6079         TaskRedInput.getPointer(), Idxs,
6080         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6081         ".rd_input.gep.");
6082     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6083     // ElemLVal.reduce_shar = &Shareds[Cnt];
6084     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6085     RCG.emitSharedOrigLValue(CGF, Cnt);
6086     llvm::Value *CastedShared =
6087         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6088     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6089     // ElemLVal.reduce_orig = &Origs[Cnt];
6090     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6091     llvm::Value *CastedOrig =
6092         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6093     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6094     RCG.emitAggregateType(CGF, Cnt);
6095     llvm::Value *SizeValInChars;
6096     llvm::Value *SizeVal;
6097     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6098     // We use delayed creation/initialization for VLAs and array sections. It is
6099     // required because runtime does not provide the way to pass the sizes of
6100     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6101     // threadprivate global variables are used to store these values and use
6102     // them in the functions.
6103     bool DelayedCreation = !!SizeVal;
6104     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6105                                                /*isSigned=*/false);
6106     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6107     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6108     // ElemLVal.reduce_init = init;
6109     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6110     llvm::Value *InitAddr =
6111         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6112     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6113     // ElemLVal.reduce_fini = fini;
6114     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6115     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6116     llvm::Value *FiniAddr = Fini
6117                                 ? CGF.EmitCastToVoidPtr(Fini)
6118                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6119     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6120     // ElemLVal.reduce_comb = comb;
6121     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6122     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6123         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6124         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6125     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6126     // ElemLVal.flags = 0;
6127     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6128     if (DelayedCreation) {
6129       CGF.EmitStoreOfScalar(
6130           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6131           FlagsLVal);
6132     } else
6133       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6134                                  FlagsLVal.getType());
6135   }
6136   if (Data.IsReductionWithTaskMod) {
6137     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6138     // is_ws, int num, void *data);
6139     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6140     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6141                                                   CGM.IntTy, /*isSigned=*/true);
6142     llvm::Value *Args[] = {
6143         IdentTLoc, GTid,
6144         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6145                                /*isSigned=*/true),
6146         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6147         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6148             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6149     return CGF.EmitRuntimeCall(
6150         OMPBuilder.getOrCreateRuntimeFunction(
6151             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6152         Args);
6153   }
6154   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6155   llvm::Value *Args[] = {
6156       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6157                                 /*isSigned=*/true),
6158       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6159       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6160                                                       CGM.VoidPtrTy)};
6161   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6162                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6163                              Args);
6164 }
6165 
6166 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6167                                             SourceLocation Loc,
6168                                             bool IsWorksharingReduction) {
6169   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6170   // is_ws, int num, void *data);
6171   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6172   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6173                                                 CGM.IntTy, /*isSigned=*/true);
6174   llvm::Value *Args[] = {IdentTLoc, GTid,
6175                          llvm::ConstantInt::get(CGM.IntTy,
6176                                                 IsWorksharingReduction ? 1 : 0,
6177                                                 /*isSigned=*/true)};
6178   (void)CGF.EmitRuntimeCall(
6179       OMPBuilder.getOrCreateRuntimeFunction(
6180           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6181       Args);
6182 }
6183 
6184 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6185                                               SourceLocation Loc,
6186                                               ReductionCodeGen &RCG,
6187                                               unsigned N) {
6188   auto Sizes = RCG.getSizes(N);
6189   // Emit threadprivate global variable if the type is non-constant
6190   // (Sizes.second = nullptr).
6191   if (Sizes.second) {
6192     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6193                                                      /*isSigned=*/false);
6194     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6195         CGF, CGM.getContext().getSizeType(),
6196         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6197     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6198   }
6199 }
6200 
6201 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6202                                               SourceLocation Loc,
6203                                               llvm::Value *ReductionsPtr,
6204                                               LValue SharedLVal) {
6205   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6206   // *d);
6207   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6208                                                    CGM.IntTy,
6209                                                    /*isSigned=*/true),
6210                          ReductionsPtr,
6211                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6212                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6213   return Address(
6214       CGF.EmitRuntimeCall(
6215           OMPBuilder.getOrCreateRuntimeFunction(
6216               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6217           Args),
6218       SharedLVal.getAlignment());
6219 }
6220 
6221 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6222                                        SourceLocation Loc) {
6223   if (!CGF.HaveInsertPoint())
6224     return;
6225 
6226   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6227     OMPBuilder.CreateTaskwait(CGF.Builder);
6228   } else {
6229     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6230     // global_tid);
6231     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6232     // Ignore return result until untied tasks are supported.
6233     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6234                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6235                         Args);
6236   }
6237 
6238   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6239     Region->emitUntiedSwitch(CGF);
6240 }
6241 
6242 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6243                                            OpenMPDirectiveKind InnerKind,
6244                                            const RegionCodeGenTy &CodeGen,
6245                                            bool HasCancel) {
6246   if (!CGF.HaveInsertPoint())
6247     return;
6248   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6249   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6250 }
6251 
6252 namespace {
6253 enum RTCancelKind {
6254   CancelNoreq = 0,
6255   CancelParallel = 1,
6256   CancelLoop = 2,
6257   CancelSections = 3,
6258   CancelTaskgroup = 4
6259 };
6260 } // anonymous namespace
6261 
6262 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6263   RTCancelKind CancelKind = CancelNoreq;
6264   if (CancelRegion == OMPD_parallel)
6265     CancelKind = CancelParallel;
6266   else if (CancelRegion == OMPD_for)
6267     CancelKind = CancelLoop;
6268   else if (CancelRegion == OMPD_sections)
6269     CancelKind = CancelSections;
6270   else {
6271     assert(CancelRegion == OMPD_taskgroup);
6272     CancelKind = CancelTaskgroup;
6273   }
6274   return CancelKind;
6275 }
6276 
6277 void CGOpenMPRuntime::emitCancellationPointCall(
6278     CodeGenFunction &CGF, SourceLocation Loc,
6279     OpenMPDirectiveKind CancelRegion) {
6280   if (!CGF.HaveInsertPoint())
6281     return;
6282   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6283   // global_tid, kmp_int32 cncl_kind);
6284   if (auto *OMPRegionInfo =
6285           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6286     // For 'cancellation point taskgroup', the task region info may not have a
6287     // cancel. This may instead happen in another adjacent task.
6288     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6289       llvm::Value *Args[] = {
6290           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6291           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6292       // Ignore return result until untied tasks are supported.
6293       llvm::Value *Result = CGF.EmitRuntimeCall(
6294           OMPBuilder.getOrCreateRuntimeFunction(
6295               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6296           Args);
6297       // if (__kmpc_cancellationpoint()) {
6298       //   exit from construct;
6299       // }
6300       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6301       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6302       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6303       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6304       CGF.EmitBlock(ExitBB);
6305       // exit from construct;
6306       CodeGenFunction::JumpDest CancelDest =
6307           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6308       CGF.EmitBranchThroughCleanup(CancelDest);
6309       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6310     }
6311   }
6312 }
6313 
6314 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6315                                      const Expr *IfCond,
6316                                      OpenMPDirectiveKind CancelRegion) {
6317   if (!CGF.HaveInsertPoint())
6318     return;
6319   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6320   // kmp_int32 cncl_kind);
6321   auto &M = CGM.getModule();
6322   if (auto *OMPRegionInfo =
6323           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6324     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6325                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6326       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6327       llvm::Value *Args[] = {
6328           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6329           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6330       // Ignore return result until untied tasks are supported.
6331       llvm::Value *Result = CGF.EmitRuntimeCall(
6332           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6333       // if (__kmpc_cancel()) {
6334       //   exit from construct;
6335       // }
6336       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6337       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6338       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6339       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6340       CGF.EmitBlock(ExitBB);
6341       // exit from construct;
6342       CodeGenFunction::JumpDest CancelDest =
6343           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6344       CGF.EmitBranchThroughCleanup(CancelDest);
6345       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6346     };
6347     if (IfCond) {
6348       emitIfClause(CGF, IfCond, ThenGen,
6349                    [](CodeGenFunction &, PrePostActionTy &) {});
6350     } else {
6351       RegionCodeGenTy ThenRCG(ThenGen);
6352       ThenRCG(CGF);
6353     }
6354   }
6355 }
6356 
6357 namespace {
6358 /// Cleanup action for uses_allocators support.
6359 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6360   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6361 
6362 public:
6363   OMPUsesAllocatorsActionTy(
6364       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6365       : Allocators(Allocators) {}
6366   void Enter(CodeGenFunction &CGF) override {
6367     if (!CGF.HaveInsertPoint())
6368       return;
6369     for (const auto &AllocatorData : Allocators) {
6370       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6371           CGF, AllocatorData.first, AllocatorData.second);
6372     }
6373   }
6374   void Exit(CodeGenFunction &CGF) override {
6375     if (!CGF.HaveInsertPoint())
6376       return;
6377     for (const auto &AllocatorData : Allocators) {
6378       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6379                                                         AllocatorData.first);
6380     }
6381   }
6382 };
6383 } // namespace
6384 
6385 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6386     const OMPExecutableDirective &D, StringRef ParentName,
6387     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6388     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6389   assert(!ParentName.empty() && "Invalid target region parent name!");
6390   HasEmittedTargetRegion = true;
6391   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6392   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6393     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6394       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6395       if (!D.AllocatorTraits)
6396         continue;
6397       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6398     }
6399   }
6400   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6401   CodeGen.setAction(UsesAllocatorAction);
6402   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6403                                    IsOffloadEntry, CodeGen);
6404 }
6405 
6406 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6407                                              const Expr *Allocator,
6408                                              const Expr *AllocatorTraits) {
6409   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6410   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6411   // Use default memspace handle.
6412   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6413   llvm::Value *NumTraits = llvm::ConstantInt::get(
6414       CGF.IntTy, cast<ConstantArrayType>(
6415                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6416                      ->getSize()
6417                      .getLimitedValue());
6418   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6419   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6420       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6421   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6422                                            AllocatorTraitsLVal.getBaseInfo(),
6423                                            AllocatorTraitsLVal.getTBAAInfo());
6424   llvm::Value *Traits =
6425       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6426 
6427   llvm::Value *AllocatorVal =
6428       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6429                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6430                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6431   // Store to allocator.
6432   CGF.EmitVarDecl(*cast<VarDecl>(
6433       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6434   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6435   AllocatorVal =
6436       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6437                                Allocator->getType(), Allocator->getExprLoc());
6438   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6439 }
6440 
6441 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6442                                              const Expr *Allocator) {
6443   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6444   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6445   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6446   llvm::Value *AllocatorVal =
6447       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6448   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6449                                           CGF.getContext().VoidPtrTy,
6450                                           Allocator->getExprLoc());
6451   (void)CGF.EmitRuntimeCall(
6452       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6453                                             OMPRTL___kmpc_destroy_allocator),
6454       {ThreadId, AllocatorVal});
6455 }
6456 
6457 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6458     const OMPExecutableDirective &D, StringRef ParentName,
6459     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6460     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6461   // Create a unique name for the entry function using the source location
6462   // information of the current target region. The name will be something like:
6463   //
6464   // __omp_offloading_DD_FFFF_PP_lBB
6465   //
6466   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6467   // mangled name of the function that encloses the target region and BB is the
6468   // line number of the target region.
6469 
6470   unsigned DeviceID;
6471   unsigned FileID;
6472   unsigned Line;
6473   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6474                            Line);
6475   SmallString<64> EntryFnName;
6476   {
6477     llvm::raw_svector_ostream OS(EntryFnName);
6478     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6479        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6480   }
6481 
6482   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6483 
6484   CodeGenFunction CGF(CGM, true);
6485   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6486   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6487 
6488   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6489 
6490   // If this target outline function is not an offload entry, we don't need to
6491   // register it.
6492   if (!IsOffloadEntry)
6493     return;
6494 
6495   // The target region ID is used by the runtime library to identify the current
6496   // target region, so it only has to be unique and not necessarily point to
6497   // anything. It could be the pointer to the outlined function that implements
6498   // the target region, but we aren't using that so that the compiler doesn't
6499   // need to keep that, and could therefore inline the host function if proven
6500   // worthwhile during optimization. In the other hand, if emitting code for the
6501   // device, the ID has to be the function address so that it can retrieved from
6502   // the offloading entry and launched by the runtime library. We also mark the
6503   // outlined function to have external linkage in case we are emitting code for
6504   // the device, because these functions will be entry points to the device.
6505 
6506   if (CGM.getLangOpts().OpenMPIsDevice) {
6507     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6508     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6509     OutlinedFn->setDSOLocal(false);
6510   } else {
6511     std::string Name = getName({EntryFnName, "region_id"});
6512     OutlinedFnID = new llvm::GlobalVariable(
6513         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6514         llvm::GlobalValue::WeakAnyLinkage,
6515         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6516   }
6517 
6518   // Register the information for the entry associated with this target region.
6519   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6520       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6521       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6522 }
6523 
6524 /// Checks if the expression is constant or does not have non-trivial function
6525 /// calls.
6526 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6527   // We can skip constant expressions.
6528   // We can skip expressions with trivial calls or simple expressions.
6529   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6530           !E->hasNonTrivialCall(Ctx)) &&
6531          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6532 }
6533 
6534 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6535                                                     const Stmt *Body) {
6536   const Stmt *Child = Body->IgnoreContainers();
6537   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6538     Child = nullptr;
6539     for (const Stmt *S : C->body()) {
6540       if (const auto *E = dyn_cast<Expr>(S)) {
6541         if (isTrivial(Ctx, E))
6542           continue;
6543       }
6544       // Some of the statements can be ignored.
6545       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6546           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6547         continue;
6548       // Analyze declarations.
6549       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6550         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6551               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6552                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6553                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6554                   isa<UsingDirectiveDecl>(D) ||
6555                   isa<OMPDeclareReductionDecl>(D) ||
6556                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6557                 return true;
6558               const auto *VD = dyn_cast<VarDecl>(D);
6559               if (!VD)
6560                 return false;
6561               return VD->isConstexpr() ||
6562                      ((VD->getType().isTrivialType(Ctx) ||
6563                        VD->getType()->isReferenceType()) &&
6564                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6565             }))
6566           continue;
6567       }
6568       // Found multiple children - cannot get the one child only.
6569       if (Child)
6570         return nullptr;
6571       Child = S;
6572     }
6573     if (Child)
6574       Child = Child->IgnoreContainers();
6575   }
6576   return Child;
6577 }
6578 
6579 /// Emit the number of teams for a target directive.  Inspect the num_teams
6580 /// clause associated with a teams construct combined or closely nested
6581 /// with the target directive.
6582 ///
6583 /// Emit a team of size one for directives such as 'target parallel' that
6584 /// have no associated teams construct.
6585 ///
6586 /// Otherwise, return nullptr.
6587 static llvm::Value *
6588 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6589                                const OMPExecutableDirective &D) {
6590   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6591          "Clauses associated with the teams directive expected to be emitted "
6592          "only for the host!");
6593   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6594   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6595          "Expected target-based executable directive.");
6596   CGBuilderTy &Bld = CGF.Builder;
6597   switch (DirectiveKind) {
6598   case OMPD_target: {
6599     const auto *CS = D.getInnermostCapturedStmt();
6600     const auto *Body =
6601         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6602     const Stmt *ChildStmt =
6603         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6604     if (const auto *NestedDir =
6605             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6606       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6607         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6608           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6609           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6610           const Expr *NumTeams =
6611               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6612           llvm::Value *NumTeamsVal =
6613               CGF.EmitScalarExpr(NumTeams,
6614                                  /*IgnoreResultAssign*/ true);
6615           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6616                                    /*isSigned=*/true);
6617         }
6618         return Bld.getInt32(0);
6619       }
6620       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6621           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6622         return Bld.getInt32(1);
6623       return Bld.getInt32(0);
6624     }
6625     return nullptr;
6626   }
6627   case OMPD_target_teams:
6628   case OMPD_target_teams_distribute:
6629   case OMPD_target_teams_distribute_simd:
6630   case OMPD_target_teams_distribute_parallel_for:
6631   case OMPD_target_teams_distribute_parallel_for_simd: {
6632     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6633       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6634       const Expr *NumTeams =
6635           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6636       llvm::Value *NumTeamsVal =
6637           CGF.EmitScalarExpr(NumTeams,
6638                              /*IgnoreResultAssign*/ true);
6639       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6640                                /*isSigned=*/true);
6641     }
6642     return Bld.getInt32(0);
6643   }
6644   case OMPD_target_parallel:
6645   case OMPD_target_parallel_for:
6646   case OMPD_target_parallel_for_simd:
6647   case OMPD_target_simd:
6648     return Bld.getInt32(1);
6649   case OMPD_parallel:
6650   case OMPD_for:
6651   case OMPD_parallel_for:
6652   case OMPD_parallel_master:
6653   case OMPD_parallel_sections:
6654   case OMPD_for_simd:
6655   case OMPD_parallel_for_simd:
6656   case OMPD_cancel:
6657   case OMPD_cancellation_point:
6658   case OMPD_ordered:
6659   case OMPD_threadprivate:
6660   case OMPD_allocate:
6661   case OMPD_task:
6662   case OMPD_simd:
6663   case OMPD_sections:
6664   case OMPD_section:
6665   case OMPD_single:
6666   case OMPD_master:
6667   case OMPD_critical:
6668   case OMPD_taskyield:
6669   case OMPD_barrier:
6670   case OMPD_taskwait:
6671   case OMPD_taskgroup:
6672   case OMPD_atomic:
6673   case OMPD_flush:
6674   case OMPD_depobj:
6675   case OMPD_scan:
6676   case OMPD_teams:
6677   case OMPD_target_data:
6678   case OMPD_target_exit_data:
6679   case OMPD_target_enter_data:
6680   case OMPD_distribute:
6681   case OMPD_distribute_simd:
6682   case OMPD_distribute_parallel_for:
6683   case OMPD_distribute_parallel_for_simd:
6684   case OMPD_teams_distribute:
6685   case OMPD_teams_distribute_simd:
6686   case OMPD_teams_distribute_parallel_for:
6687   case OMPD_teams_distribute_parallel_for_simd:
6688   case OMPD_target_update:
6689   case OMPD_declare_simd:
6690   case OMPD_declare_variant:
6691   case OMPD_begin_declare_variant:
6692   case OMPD_end_declare_variant:
6693   case OMPD_declare_target:
6694   case OMPD_end_declare_target:
6695   case OMPD_declare_reduction:
6696   case OMPD_declare_mapper:
6697   case OMPD_taskloop:
6698   case OMPD_taskloop_simd:
6699   case OMPD_master_taskloop:
6700   case OMPD_master_taskloop_simd:
6701   case OMPD_parallel_master_taskloop:
6702   case OMPD_parallel_master_taskloop_simd:
6703   case OMPD_requires:
6704   case OMPD_unknown:
6705     break;
6706   default:
6707     break;
6708   }
6709   llvm_unreachable("Unexpected directive kind.");
6710 }
6711 
6712 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6713                                   llvm::Value *DefaultThreadLimitVal) {
6714   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6715       CGF.getContext(), CS->getCapturedStmt());
6716   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6717     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6718       llvm::Value *NumThreads = nullptr;
6719       llvm::Value *CondVal = nullptr;
6720       // Handle if clause. If if clause present, the number of threads is
6721       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6722       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6723         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6724         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6725         const OMPIfClause *IfClause = nullptr;
6726         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6727           if (C->getNameModifier() == OMPD_unknown ||
6728               C->getNameModifier() == OMPD_parallel) {
6729             IfClause = C;
6730             break;
6731           }
6732         }
6733         if (IfClause) {
6734           const Expr *Cond = IfClause->getCondition();
6735           bool Result;
6736           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6737             if (!Result)
6738               return CGF.Builder.getInt32(1);
6739           } else {
6740             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6741             if (const auto *PreInit =
6742                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6743               for (const auto *I : PreInit->decls()) {
6744                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6745                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6746                 } else {
6747                   CodeGenFunction::AutoVarEmission Emission =
6748                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6749                   CGF.EmitAutoVarCleanups(Emission);
6750                 }
6751               }
6752             }
6753             CondVal = CGF.EvaluateExprAsBool(Cond);
6754           }
6755         }
6756       }
6757       // Check the value of num_threads clause iff if clause was not specified
6758       // or is not evaluated to false.
6759       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6760         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6761         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6762         const auto *NumThreadsClause =
6763             Dir->getSingleClause<OMPNumThreadsClause>();
6764         CodeGenFunction::LexicalScope Scope(
6765             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6766         if (const auto *PreInit =
6767                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6768           for (const auto *I : PreInit->decls()) {
6769             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6770               CGF.EmitVarDecl(cast<VarDecl>(*I));
6771             } else {
6772               CodeGenFunction::AutoVarEmission Emission =
6773                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6774               CGF.EmitAutoVarCleanups(Emission);
6775             }
6776           }
6777         }
6778         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6779         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6780                                                /*isSigned=*/false);
6781         if (DefaultThreadLimitVal)
6782           NumThreads = CGF.Builder.CreateSelect(
6783               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6784               DefaultThreadLimitVal, NumThreads);
6785       } else {
6786         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6787                                            : CGF.Builder.getInt32(0);
6788       }
6789       // Process condition of the if clause.
6790       if (CondVal) {
6791         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6792                                               CGF.Builder.getInt32(1));
6793       }
6794       return NumThreads;
6795     }
6796     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6797       return CGF.Builder.getInt32(1);
6798     return DefaultThreadLimitVal;
6799   }
6800   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6801                                : CGF.Builder.getInt32(0);
6802 }
6803 
6804 /// Emit the number of threads for a target directive.  Inspect the
6805 /// thread_limit clause associated with a teams construct combined or closely
6806 /// nested with the target directive.
6807 ///
6808 /// Emit the num_threads clause for directives such as 'target parallel' that
6809 /// have no associated teams construct.
6810 ///
6811 /// Otherwise, return nullptr.
6812 static llvm::Value *
6813 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6814                                  const OMPExecutableDirective &D) {
6815   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6816          "Clauses associated with the teams directive expected to be emitted "
6817          "only for the host!");
6818   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6819   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6820          "Expected target-based executable directive.");
6821   CGBuilderTy &Bld = CGF.Builder;
6822   llvm::Value *ThreadLimitVal = nullptr;
6823   llvm::Value *NumThreadsVal = nullptr;
6824   switch (DirectiveKind) {
6825   case OMPD_target: {
6826     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6827     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6828       return NumThreads;
6829     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6830         CGF.getContext(), CS->getCapturedStmt());
6831     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6832       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6833         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6834         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6835         const auto *ThreadLimitClause =
6836             Dir->getSingleClause<OMPThreadLimitClause>();
6837         CodeGenFunction::LexicalScope Scope(
6838             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6839         if (const auto *PreInit =
6840                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6841           for (const auto *I : PreInit->decls()) {
6842             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6843               CGF.EmitVarDecl(cast<VarDecl>(*I));
6844             } else {
6845               CodeGenFunction::AutoVarEmission Emission =
6846                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6847               CGF.EmitAutoVarCleanups(Emission);
6848             }
6849           }
6850         }
6851         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6852             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6853         ThreadLimitVal =
6854             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6855       }
6856       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6857           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6858         CS = Dir->getInnermostCapturedStmt();
6859         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6860             CGF.getContext(), CS->getCapturedStmt());
6861         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6862       }
6863       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6864           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6865         CS = Dir->getInnermostCapturedStmt();
6866         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6867           return NumThreads;
6868       }
6869       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6870         return Bld.getInt32(1);
6871     }
6872     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6873   }
6874   case OMPD_target_teams: {
6875     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6876       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6877       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6878       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6879           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6880       ThreadLimitVal =
6881           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6882     }
6883     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6884     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6885       return NumThreads;
6886     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6887         CGF.getContext(), CS->getCapturedStmt());
6888     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6889       if (Dir->getDirectiveKind() == OMPD_distribute) {
6890         CS = Dir->getInnermostCapturedStmt();
6891         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6892           return NumThreads;
6893       }
6894     }
6895     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6896   }
6897   case OMPD_target_teams_distribute:
6898     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6900       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6901       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6902           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6903       ThreadLimitVal =
6904           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6905     }
6906     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6907   case OMPD_target_parallel:
6908   case OMPD_target_parallel_for:
6909   case OMPD_target_parallel_for_simd:
6910   case OMPD_target_teams_distribute_parallel_for:
6911   case OMPD_target_teams_distribute_parallel_for_simd: {
6912     llvm::Value *CondVal = nullptr;
6913     // Handle if clause. If if clause present, the number of threads is
6914     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6915     if (D.hasClausesOfKind<OMPIfClause>()) {
6916       const OMPIfClause *IfClause = nullptr;
6917       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6918         if (C->getNameModifier() == OMPD_unknown ||
6919             C->getNameModifier() == OMPD_parallel) {
6920           IfClause = C;
6921           break;
6922         }
6923       }
6924       if (IfClause) {
6925         const Expr *Cond = IfClause->getCondition();
6926         bool Result;
6927         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6928           if (!Result)
6929             return Bld.getInt32(1);
6930         } else {
6931           CodeGenFunction::RunCleanupsScope Scope(CGF);
6932           CondVal = CGF.EvaluateExprAsBool(Cond);
6933         }
6934       }
6935     }
6936     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6937       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6938       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6939       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6940           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6941       ThreadLimitVal =
6942           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6943     }
6944     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6945       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6946       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6947       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6948           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6949       NumThreadsVal =
6950           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6951       ThreadLimitVal = ThreadLimitVal
6952                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6953                                                                 ThreadLimitVal),
6954                                               NumThreadsVal, ThreadLimitVal)
6955                            : NumThreadsVal;
6956     }
6957     if (!ThreadLimitVal)
6958       ThreadLimitVal = Bld.getInt32(0);
6959     if (CondVal)
6960       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6961     return ThreadLimitVal;
6962   }
6963   case OMPD_target_teams_distribute_simd:
6964   case OMPD_target_simd:
6965     return Bld.getInt32(1);
6966   case OMPD_parallel:
6967   case OMPD_for:
6968   case OMPD_parallel_for:
6969   case OMPD_parallel_master:
6970   case OMPD_parallel_sections:
6971   case OMPD_for_simd:
6972   case OMPD_parallel_for_simd:
6973   case OMPD_cancel:
6974   case OMPD_cancellation_point:
6975   case OMPD_ordered:
6976   case OMPD_threadprivate:
6977   case OMPD_allocate:
6978   case OMPD_task:
6979   case OMPD_simd:
6980   case OMPD_sections:
6981   case OMPD_section:
6982   case OMPD_single:
6983   case OMPD_master:
6984   case OMPD_critical:
6985   case OMPD_taskyield:
6986   case OMPD_barrier:
6987   case OMPD_taskwait:
6988   case OMPD_taskgroup:
6989   case OMPD_atomic:
6990   case OMPD_flush:
6991   case OMPD_depobj:
6992   case OMPD_scan:
6993   case OMPD_teams:
6994   case OMPD_target_data:
6995   case OMPD_target_exit_data:
6996   case OMPD_target_enter_data:
6997   case OMPD_distribute:
6998   case OMPD_distribute_simd:
6999   case OMPD_distribute_parallel_for:
7000   case OMPD_distribute_parallel_for_simd:
7001   case OMPD_teams_distribute:
7002   case OMPD_teams_distribute_simd:
7003   case OMPD_teams_distribute_parallel_for:
7004   case OMPD_teams_distribute_parallel_for_simd:
7005   case OMPD_target_update:
7006   case OMPD_declare_simd:
7007   case OMPD_declare_variant:
7008   case OMPD_begin_declare_variant:
7009   case OMPD_end_declare_variant:
7010   case OMPD_declare_target:
7011   case OMPD_end_declare_target:
7012   case OMPD_declare_reduction:
7013   case OMPD_declare_mapper:
7014   case OMPD_taskloop:
7015   case OMPD_taskloop_simd:
7016   case OMPD_master_taskloop:
7017   case OMPD_master_taskloop_simd:
7018   case OMPD_parallel_master_taskloop:
7019   case OMPD_parallel_master_taskloop_simd:
7020   case OMPD_requires:
7021   case OMPD_unknown:
7022     break;
7023   default:
7024     break;
7025   }
7026   llvm_unreachable("Unsupported directive kind.");
7027 }
7028 
7029 namespace {
7030 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7031 
7032 // Utility to handle information from clauses associated with a given
7033 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7034 // It provides a convenient interface to obtain the information and generate
7035 // code for that information.
7036 class MappableExprsHandler {
7037 public:
7038   /// Values for bit flags used to specify the mapping type for
7039   /// offloading.
7040   enum OpenMPOffloadMappingFlags : uint64_t {
7041     /// No flags
7042     OMP_MAP_NONE = 0x0,
7043     /// Allocate memory on the device and move data from host to device.
7044     OMP_MAP_TO = 0x01,
7045     /// Allocate memory on the device and move data from device to host.
7046     OMP_MAP_FROM = 0x02,
7047     /// Always perform the requested mapping action on the element, even
7048     /// if it was already mapped before.
7049     OMP_MAP_ALWAYS = 0x04,
7050     /// Delete the element from the device environment, ignoring the
7051     /// current reference count associated with the element.
7052     OMP_MAP_DELETE = 0x08,
7053     /// The element being mapped is a pointer-pointee pair; both the
7054     /// pointer and the pointee should be mapped.
7055     OMP_MAP_PTR_AND_OBJ = 0x10,
7056     /// This flags signals that the base address of an entry should be
7057     /// passed to the target kernel as an argument.
7058     OMP_MAP_TARGET_PARAM = 0x20,
7059     /// Signal that the runtime library has to return the device pointer
7060     /// in the current position for the data being mapped. Used when we have the
7061     /// use_device_ptr or use_device_addr clause.
7062     OMP_MAP_RETURN_PARAM = 0x40,
7063     /// This flag signals that the reference being passed is a pointer to
7064     /// private data.
7065     OMP_MAP_PRIVATE = 0x80,
7066     /// Pass the element to the device by value.
7067     OMP_MAP_LITERAL = 0x100,
7068     /// Implicit map
7069     OMP_MAP_IMPLICIT = 0x200,
7070     /// Close is a hint to the runtime to allocate memory close to
7071     /// the target device.
7072     OMP_MAP_CLOSE = 0x400,
7073     /// 0x800 is reserved for compatibility with XLC.
7074     /// Produce a runtime error if the data is not already allocated.
7075     OMP_MAP_PRESENT = 0x1000,
7076     /// The 16 MSBs of the flags indicate whether the entry is member of some
7077     /// struct/class.
7078     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7079     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7080   };
7081 
7082   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7083   static unsigned getFlagMemberOffset() {
7084     unsigned Offset = 0;
7085     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7086          Remain = Remain >> 1)
7087       Offset++;
7088     return Offset;
7089   }
7090 
7091   /// Class that associates information with a base pointer to be passed to the
7092   /// runtime library.
7093   class BasePointerInfo {
7094     /// The base pointer.
7095     llvm::Value *Ptr = nullptr;
7096     /// The base declaration that refers to this device pointer, or null if
7097     /// there is none.
7098     const ValueDecl *DevPtrDecl = nullptr;
7099 
7100   public:
7101     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7102         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7103     llvm::Value *operator*() const { return Ptr; }
7104     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7105     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7106   };
7107 
7108   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7109   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7110   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7111   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7112 
7113   /// This structure contains combined information generated for mappable
7114   /// clauses, including base pointers, pointers, sizes, map types, and
7115   /// user-defined mappers.
7116   struct MapCombinedInfoTy {
7117     MapBaseValuesArrayTy BasePointers;
7118     MapValuesArrayTy Pointers;
7119     MapValuesArrayTy Sizes;
7120     MapFlagsArrayTy Types;
7121     MapMappersArrayTy Mappers;
7122 
7123     /// Append arrays in \a CurInfo.
7124     void append(MapCombinedInfoTy &CurInfo) {
7125       BasePointers.append(CurInfo.BasePointers.begin(),
7126                           CurInfo.BasePointers.end());
7127       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7128       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7129       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7130       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7131     }
7132   };
7133 
7134   /// Map between a struct and the its lowest & highest elements which have been
7135   /// mapped.
7136   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7137   ///                    HE(FieldIndex, Pointer)}
7138   struct StructRangeInfoTy {
7139     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7140         0, Address::invalid()};
7141     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7142         0, Address::invalid()};
7143     Address Base = Address::invalid();
7144   };
7145 
7146 private:
7147   /// Kind that defines how a device pointer has to be returned.
7148   struct MapInfo {
7149     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7150     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7151     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7152     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7153     bool ReturnDevicePointer = false;
7154     bool IsImplicit = false;
7155     const ValueDecl *Mapper = nullptr;
7156     bool ForDeviceAddr = false;
7157 
7158     MapInfo() = default;
7159     MapInfo(
7160         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7161         OpenMPMapClauseKind MapType,
7162         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7163         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7164         bool ReturnDevicePointer, bool IsImplicit,
7165         const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false)
7166         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7167           MotionModifiers(MotionModifiers),
7168           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7169           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7170   };
7171 
7172   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7173   /// member and there is no map information about it, then emission of that
7174   /// entry is deferred until the whole struct has been processed.
7175   struct DeferredDevicePtrEntryTy {
7176     const Expr *IE = nullptr;
7177     const ValueDecl *VD = nullptr;
7178     bool ForDeviceAddr = false;
7179 
7180     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7181                              bool ForDeviceAddr)
7182         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7183   };
7184 
7185   /// The target directive from where the mappable clauses were extracted. It
7186   /// is either a executable directive or a user-defined mapper directive.
7187   llvm::PointerUnion<const OMPExecutableDirective *,
7188                      const OMPDeclareMapperDecl *>
7189       CurDir;
7190 
7191   /// Function the directive is being generated for.
7192   CodeGenFunction &CGF;
7193 
7194   /// Set of all first private variables in the current directive.
7195   /// bool data is set to true if the variable is implicitly marked as
7196   /// firstprivate, false otherwise.
7197   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7198 
7199   /// Map between device pointer declarations and their expression components.
7200   /// The key value for declarations in 'this' is null.
7201   llvm::DenseMap<
7202       const ValueDecl *,
7203       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7204       DevPointersMap;
7205 
7206   llvm::Value *getExprTypeSize(const Expr *E) const {
7207     QualType ExprTy = E->getType().getCanonicalType();
7208 
7209     // Calculate the size for array shaping expression.
7210     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7211       llvm::Value *Size =
7212           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7213       for (const Expr *SE : OAE->getDimensions()) {
7214         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7215         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7216                                       CGF.getContext().getSizeType(),
7217                                       SE->getExprLoc());
7218         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7219       }
7220       return Size;
7221     }
7222 
7223     // Reference types are ignored for mapping purposes.
7224     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7225       ExprTy = RefTy->getPointeeType().getCanonicalType();
7226 
7227     // Given that an array section is considered a built-in type, we need to
7228     // do the calculation based on the length of the section instead of relying
7229     // on CGF.getTypeSize(E->getType()).
7230     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7231       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7232                             OAE->getBase()->IgnoreParenImpCasts())
7233                             .getCanonicalType();
7234 
7235       // If there is no length associated with the expression and lower bound is
7236       // not specified too, that means we are using the whole length of the
7237       // base.
7238       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7239           !OAE->getLowerBound())
7240         return CGF.getTypeSize(BaseTy);
7241 
7242       llvm::Value *ElemSize;
7243       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7244         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7245       } else {
7246         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7247         assert(ATy && "Expecting array type if not a pointer type.");
7248         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7249       }
7250 
7251       // If we don't have a length at this point, that is because we have an
7252       // array section with a single element.
7253       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7254         return ElemSize;
7255 
7256       if (const Expr *LenExpr = OAE->getLength()) {
7257         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7258         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7259                                              CGF.getContext().getSizeType(),
7260                                              LenExpr->getExprLoc());
7261         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7262       }
7263       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7264              OAE->getLowerBound() && "expected array_section[lb:].");
7265       // Size = sizetype - lb * elemtype;
7266       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7267       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7268       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7269                                        CGF.getContext().getSizeType(),
7270                                        OAE->getLowerBound()->getExprLoc());
7271       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7272       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7273       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7274       LengthVal = CGF.Builder.CreateSelect(
7275           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7276       return LengthVal;
7277     }
7278     return CGF.getTypeSize(ExprTy);
7279   }
7280 
7281   /// Return the corresponding bits for a given map clause modifier. Add
7282   /// a flag marking the map as a pointer if requested. Add a flag marking the
7283   /// map as the first one of a series of maps that relate to the same map
7284   /// expression.
7285   OpenMPOffloadMappingFlags getMapTypeBits(
7286       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7287       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7288       bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7289     OpenMPOffloadMappingFlags Bits =
7290         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7291     switch (MapType) {
7292     case OMPC_MAP_alloc:
7293     case OMPC_MAP_release:
7294       // alloc and release is the default behavior in the runtime library,  i.e.
7295       // if we don't pass any bits alloc/release that is what the runtime is
7296       // going to do. Therefore, we don't need to signal anything for these two
7297       // type modifiers.
7298       break;
7299     case OMPC_MAP_to:
7300       Bits |= OMP_MAP_TO;
7301       break;
7302     case OMPC_MAP_from:
7303       Bits |= OMP_MAP_FROM;
7304       break;
7305     case OMPC_MAP_tofrom:
7306       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7307       break;
7308     case OMPC_MAP_delete:
7309       Bits |= OMP_MAP_DELETE;
7310       break;
7311     case OMPC_MAP_unknown:
7312       llvm_unreachable("Unexpected map type!");
7313     }
7314     if (AddPtrFlag)
7315       Bits |= OMP_MAP_PTR_AND_OBJ;
7316     if (AddIsTargetParamFlag)
7317       Bits |= OMP_MAP_TARGET_PARAM;
7318     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7319         != MapModifiers.end())
7320       Bits |= OMP_MAP_ALWAYS;
7321     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7322         != MapModifiers.end())
7323       Bits |= OMP_MAP_CLOSE;
7324     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7325         != MapModifiers.end())
7326       Bits |= OMP_MAP_PRESENT;
7327     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7328         != MotionModifiers.end())
7329       Bits |= OMP_MAP_PRESENT;
7330     return Bits;
7331   }
7332 
7333   /// Return true if the provided expression is a final array section. A
7334   /// final array section, is one whose length can't be proved to be one.
7335   bool isFinalArraySectionExpression(const Expr *E) const {
7336     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7337 
7338     // It is not an array section and therefore not a unity-size one.
7339     if (!OASE)
7340       return false;
7341 
7342     // An array section with no colon always refer to a single element.
7343     if (OASE->getColonLocFirst().isInvalid())
7344       return false;
7345 
7346     const Expr *Length = OASE->getLength();
7347 
7348     // If we don't have a length we have to check if the array has size 1
7349     // for this dimension. Also, we should always expect a length if the
7350     // base type is pointer.
7351     if (!Length) {
7352       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7353                              OASE->getBase()->IgnoreParenImpCasts())
7354                              .getCanonicalType();
7355       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7356         return ATy->getSize().getSExtValue() != 1;
7357       // If we don't have a constant dimension length, we have to consider
7358       // the current section as having any size, so it is not necessarily
7359       // unitary. If it happen to be unity size, that's user fault.
7360       return true;
7361     }
7362 
7363     // Check if the length evaluates to 1.
7364     Expr::EvalResult Result;
7365     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7366       return true; // Can have more that size 1.
7367 
7368     llvm::APSInt ConstLength = Result.Val.getInt();
7369     return ConstLength.getSExtValue() != 1;
7370   }
7371 
7372   /// Generate the base pointers, section pointers, sizes, map type bits, and
7373   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7374   /// map type, map or motion modifiers, and expression components.
7375   /// \a IsFirstComponent should be set to true if the provided set of
7376   /// components is the first associated with a capture.
7377   void generateInfoForComponentList(
7378       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7379       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7380       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7381       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7382       bool IsFirstComponentList, bool IsImplicit,
7383       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7384       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7385           OverlappedElements = llvm::None) const {
7386     // The following summarizes what has to be generated for each map and the
7387     // types below. The generated information is expressed in this order:
7388     // base pointer, section pointer, size, flags
7389     // (to add to the ones that come from the map type and modifier).
7390     //
7391     // double d;
7392     // int i[100];
7393     // float *p;
7394     //
7395     // struct S1 {
7396     //   int i;
7397     //   float f[50];
7398     // }
7399     // struct S2 {
7400     //   int i;
7401     //   float f[50];
7402     //   S1 s;
7403     //   double *p;
7404     //   struct S2 *ps;
7405     // }
7406     // S2 s;
7407     // S2 *ps;
7408     //
7409     // map(d)
7410     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7411     //
7412     // map(i)
7413     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7414     //
7415     // map(i[1:23])
7416     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7417     //
7418     // map(p)
7419     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7420     //
7421     // map(p[1:24])
7422     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7423     // in unified shared memory mode or for local pointers
7424     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7425     //
7426     // map(s)
7427     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7428     //
7429     // map(s.i)
7430     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7431     //
7432     // map(s.s.f)
7433     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7434     //
7435     // map(s.p)
7436     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7437     //
7438     // map(to: s.p[:22])
7439     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7440     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7441     // &(s.p), &(s.p[0]), 22*sizeof(double),
7442     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7443     // (*) alloc space for struct members, only this is a target parameter
7444     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7445     //      optimizes this entry out, same in the examples below)
7446     // (***) map the pointee (map: to)
7447     //
7448     // map(s.ps)
7449     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7450     //
7451     // map(from: s.ps->s.i)
7452     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7453     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7454     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7455     //
7456     // map(to: s.ps->ps)
7457     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7458     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7459     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7460     //
7461     // map(s.ps->ps->ps)
7462     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7463     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7464     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7465     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7466     //
7467     // map(to: s.ps->ps->s.f[:22])
7468     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7469     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7470     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7471     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7472     //
7473     // map(ps)
7474     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7475     //
7476     // map(ps->i)
7477     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7478     //
7479     // map(ps->s.f)
7480     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7481     //
7482     // map(from: ps->p)
7483     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7484     //
7485     // map(to: ps->p[:22])
7486     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7487     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7488     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7489     //
7490     // map(ps->ps)
7491     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7492     //
7493     // map(from: ps->ps->s.i)
7494     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7495     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7496     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7497     //
7498     // map(from: ps->ps->ps)
7499     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7500     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7501     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7502     //
7503     // map(ps->ps->ps->ps)
7504     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7505     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7506     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7507     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7508     //
7509     // map(to: ps->ps->ps->s.f[:22])
7510     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7511     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7512     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7513     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7514     //
7515     // map(to: s.f[:22]) map(from: s.p[:33])
7516     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7517     //     sizeof(double*) (**), TARGET_PARAM
7518     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7519     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7520     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7521     // (*) allocate contiguous space needed to fit all mapped members even if
7522     //     we allocate space for members not mapped (in this example,
7523     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7524     //     them as well because they fall between &s.f[0] and &s.p)
7525     //
7526     // map(from: s.f[:22]) map(to: ps->p[:33])
7527     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7528     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7529     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7530     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7531     // (*) the struct this entry pertains to is the 2nd element in the list of
7532     //     arguments, hence MEMBER_OF(2)
7533     //
7534     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7535     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7536     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7537     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7538     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7539     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7540     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7541     // (*) the struct this entry pertains to is the 4th element in the list
7542     //     of arguments, hence MEMBER_OF(4)
7543 
7544     // Track if the map information being generated is the first for a capture.
7545     bool IsCaptureFirstInfo = IsFirstComponentList;
7546     // When the variable is on a declare target link or in a to clause with
7547     // unified memory, a reference is needed to hold the host/device address
7548     // of the variable.
7549     bool RequiresReference = false;
7550 
7551     // Scan the components from the base to the complete expression.
7552     auto CI = Components.rbegin();
7553     auto CE = Components.rend();
7554     auto I = CI;
7555 
7556     // Track if the map information being generated is the first for a list of
7557     // components.
7558     bool IsExpressionFirstInfo = true;
7559     bool FirstPointerInComplexData = false;
7560     Address BP = Address::invalid();
7561     const Expr *AssocExpr = I->getAssociatedExpression();
7562     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7563     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7564     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7565 
7566     if (isa<MemberExpr>(AssocExpr)) {
7567       // The base is the 'this' pointer. The content of the pointer is going
7568       // to be the base of the field being mapped.
7569       BP = CGF.LoadCXXThisAddress();
7570     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7571                (OASE &&
7572                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7573       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7574     } else if (OAShE &&
7575                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7576       BP = Address(
7577           CGF.EmitScalarExpr(OAShE->getBase()),
7578           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7579     } else {
7580       // The base is the reference to the variable.
7581       // BP = &Var.
7582       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7583       if (const auto *VD =
7584               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7585         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7586                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7587           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7588               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7589                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7590             RequiresReference = true;
7591             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7592           }
7593         }
7594       }
7595 
7596       // If the variable is a pointer and is being dereferenced (i.e. is not
7597       // the last component), the base has to be the pointer itself, not its
7598       // reference. References are ignored for mapping purposes.
7599       QualType Ty =
7600           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7601       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7602         // No need to generate individual map information for the pointer, it
7603         // can be associated with the combined storage if shared memory mode is
7604         // active or the base declaration is not global variable.
7605         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7606         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7607             !VD || VD->hasLocalStorage())
7608           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7609         else
7610           FirstPointerInComplexData = true;
7611         ++I;
7612       }
7613     }
7614 
7615     // Track whether a component of the list should be marked as MEMBER_OF some
7616     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7617     // in a component list should be marked as MEMBER_OF, all subsequent entries
7618     // do not belong to the base struct. E.g.
7619     // struct S2 s;
7620     // s.ps->ps->ps->f[:]
7621     //   (1) (2) (3) (4)
7622     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7623     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7624     // is the pointee of ps(2) which is not member of struct s, so it should not
7625     // be marked as such (it is still PTR_AND_OBJ).
7626     // The variable is initialized to false so that PTR_AND_OBJ entries which
7627     // are not struct members are not considered (e.g. array of pointers to
7628     // data).
7629     bool ShouldBeMemberOf = false;
7630 
7631     // Variable keeping track of whether or not we have encountered a component
7632     // in the component list which is a member expression. Useful when we have a
7633     // pointer or a final array section, in which case it is the previous
7634     // component in the list which tells us whether we have a member expression.
7635     // E.g. X.f[:]
7636     // While processing the final array section "[:]" it is "f" which tells us
7637     // whether we are dealing with a member of a declared struct.
7638     const MemberExpr *EncounteredME = nullptr;
7639 
7640     for (; I != CE; ++I) {
7641       // If the current component is member of a struct (parent struct) mark it.
7642       if (!EncounteredME) {
7643         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7644         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7645         // as MEMBER_OF the parent struct.
7646         if (EncounteredME) {
7647           ShouldBeMemberOf = true;
7648           // Do not emit as complex pointer if this is actually not array-like
7649           // expression.
7650           if (FirstPointerInComplexData) {
7651             QualType Ty = std::prev(I)
7652                               ->getAssociatedDeclaration()
7653                               ->getType()
7654                               .getNonReferenceType();
7655             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7656             FirstPointerInComplexData = false;
7657           }
7658         }
7659       }
7660 
7661       auto Next = std::next(I);
7662 
7663       // We need to generate the addresses and sizes if this is the last
7664       // component, if the component is a pointer or if it is an array section
7665       // whose length can't be proved to be one. If this is a pointer, it
7666       // becomes the base address for the following components.
7667 
7668       // A final array section, is one whose length can't be proved to be one.
7669       bool IsFinalArraySection =
7670           isFinalArraySectionExpression(I->getAssociatedExpression());
7671 
7672       // Get information on whether the element is a pointer. Have to do a
7673       // special treatment for array sections given that they are built-in
7674       // types.
7675       const auto *OASE =
7676           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7677       const auto *OAShE =
7678           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7679       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7680       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7681       bool IsPointer =
7682           OAShE ||
7683           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7684                        .getCanonicalType()
7685                        ->isAnyPointerType()) ||
7686           I->getAssociatedExpression()->getType()->isAnyPointerType();
7687       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7688 
7689       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7690         // If this is not the last component, we expect the pointer to be
7691         // associated with an array expression or member expression.
7692         assert((Next == CE ||
7693                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7694                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7695                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7696                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7697                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7698                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7699                "Unexpected expression");
7700 
7701         Address LB = Address::invalid();
7702         if (OAShE) {
7703           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7704                        CGF.getContext().getTypeAlignInChars(
7705                            OAShE->getBase()->getType()));
7706         } else {
7707           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7708                    .getAddress(CGF);
7709         }
7710 
7711         // If this component is a pointer inside the base struct then we don't
7712         // need to create any entry for it - it will be combined with the object
7713         // it is pointing to into a single PTR_AND_OBJ entry.
7714         bool IsMemberPointerOrAddr =
7715             (IsPointer || ForDeviceAddr) && EncounteredME &&
7716             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7717              EncounteredME);
7718         if (!OverlappedElements.empty()) {
7719           // Handle base element with the info for overlapped elements.
7720           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7721           assert(Next == CE &&
7722                  "Expected last element for the overlapped elements.");
7723           assert(!IsPointer &&
7724                  "Unexpected base element with the pointer type.");
7725           // Mark the whole struct as the struct that requires allocation on the
7726           // device.
7727           PartialStruct.LowestElem = {0, LB};
7728           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7729               I->getAssociatedExpression()->getType());
7730           Address HB = CGF.Builder.CreateConstGEP(
7731               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7732                                                               CGF.VoidPtrTy),
7733               TypeSize.getQuantity() - 1);
7734           PartialStruct.HighestElem = {
7735               std::numeric_limits<decltype(
7736                   PartialStruct.HighestElem.first)>::max(),
7737               HB};
7738           PartialStruct.Base = BP;
7739           // Emit data for non-overlapped data.
7740           OpenMPOffloadMappingFlags Flags =
7741               OMP_MAP_MEMBER_OF |
7742               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7743                              /*AddPtrFlag=*/false,
7744                              /*AddIsTargetParamFlag=*/false);
7745           LB = BP;
7746           llvm::Value *Size = nullptr;
7747           // Do bitcopy of all non-overlapped structure elements.
7748           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7749                    Component : OverlappedElements) {
7750             Address ComponentLB = Address::invalid();
7751             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7752                  Component) {
7753               if (MC.getAssociatedDeclaration()) {
7754                 ComponentLB =
7755                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7756                         .getAddress(CGF);
7757                 Size = CGF.Builder.CreatePtrDiff(
7758                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7759                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7760                 break;
7761               }
7762             }
7763             CombinedInfo.BasePointers.push_back(BP.getPointer());
7764             CombinedInfo.Pointers.push_back(LB.getPointer());
7765             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7766                 Size, CGF.Int64Ty, /*isSigned=*/true));
7767             CombinedInfo.Types.push_back(Flags);
7768             CombinedInfo.Mappers.push_back(nullptr);
7769             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7770           }
7771           CombinedInfo.BasePointers.push_back(BP.getPointer());
7772           CombinedInfo.Pointers.push_back(LB.getPointer());
7773           Size = CGF.Builder.CreatePtrDiff(
7774               CGF.EmitCastToVoidPtr(
7775                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7776               CGF.EmitCastToVoidPtr(LB.getPointer()));
7777           CombinedInfo.Sizes.push_back(
7778               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7779           CombinedInfo.Types.push_back(Flags);
7780           CombinedInfo.Mappers.push_back(nullptr);
7781           break;
7782         }
7783         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7784         if (!IsMemberPointerOrAddr) {
7785           CombinedInfo.BasePointers.push_back(BP.getPointer());
7786           CombinedInfo.Pointers.push_back(LB.getPointer());
7787           CombinedInfo.Sizes.push_back(
7788               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7789 
7790           // If Mapper is valid, the last component inherits the mapper.
7791           bool HasMapper = Mapper && Next == CE;
7792           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7793 
7794           // We need to add a pointer flag for each map that comes from the
7795           // same expression except for the first one. We also need to signal
7796           // this map is the first one that relates with the current capture
7797           // (there is a set of entries for each capture).
7798           OpenMPOffloadMappingFlags Flags =
7799               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7800                              !IsExpressionFirstInfo || RequiresReference ||
7801                                  FirstPointerInComplexData,
7802                              IsCaptureFirstInfo && !RequiresReference);
7803 
7804           if (!IsExpressionFirstInfo) {
7805             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7806             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7807             if (IsPointer)
7808               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7809                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7810 
7811             if (ShouldBeMemberOf) {
7812               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7813               // should be later updated with the correct value of MEMBER_OF.
7814               Flags |= OMP_MAP_MEMBER_OF;
7815               // From now on, all subsequent PTR_AND_OBJ entries should not be
7816               // marked as MEMBER_OF.
7817               ShouldBeMemberOf = false;
7818             }
7819           }
7820 
7821           CombinedInfo.Types.push_back(Flags);
7822         }
7823 
7824         // If we have encountered a member expression so far, keep track of the
7825         // mapped member. If the parent is "*this", then the value declaration
7826         // is nullptr.
7827         if (EncounteredME) {
7828           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7829           unsigned FieldIndex = FD->getFieldIndex();
7830 
7831           // Update info about the lowest and highest elements for this struct
7832           if (!PartialStruct.Base.isValid()) {
7833             PartialStruct.LowestElem = {FieldIndex, LB};
7834             if (IsFinalArraySection) {
7835               Address HB =
7836                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7837                       .getAddress(CGF);
7838               PartialStruct.HighestElem = {FieldIndex, HB};
7839             } else {
7840               PartialStruct.HighestElem = {FieldIndex, LB};
7841             }
7842             PartialStruct.Base = BP;
7843           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7844             PartialStruct.LowestElem = {FieldIndex, LB};
7845           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7846             PartialStruct.HighestElem = {FieldIndex, LB};
7847           }
7848         }
7849 
7850         // If we have a final array section, we are done with this expression.
7851         if (IsFinalArraySection)
7852           break;
7853 
7854         // The pointer becomes the base for the next element.
7855         if (Next != CE)
7856           BP = LB;
7857 
7858         IsExpressionFirstInfo = false;
7859         IsCaptureFirstInfo = false;
7860         FirstPointerInComplexData = false;
7861       }
7862     }
7863   }
7864 
7865   /// Return the adjusted map modifiers if the declaration a capture refers to
7866   /// appears in a first-private clause. This is expected to be used only with
7867   /// directives that start with 'target'.
7868   MappableExprsHandler::OpenMPOffloadMappingFlags
7869   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7870     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7871 
7872     // A first private variable captured by reference will use only the
7873     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7874     // declaration is known as first-private in this handler.
7875     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7876       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7877           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7878         return MappableExprsHandler::OMP_MAP_ALWAYS |
7879                MappableExprsHandler::OMP_MAP_TO;
7880       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7881         return MappableExprsHandler::OMP_MAP_TO |
7882                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7883       return MappableExprsHandler::OMP_MAP_PRIVATE |
7884              MappableExprsHandler::OMP_MAP_TO;
7885     }
7886     return MappableExprsHandler::OMP_MAP_TO |
7887            MappableExprsHandler::OMP_MAP_FROM;
7888   }
7889 
7890   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7891     // Rotate by getFlagMemberOffset() bits.
7892     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7893                                                   << getFlagMemberOffset());
7894   }
7895 
7896   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7897                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7898     // If the entry is PTR_AND_OBJ but has not been marked with the special
7899     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7900     // marked as MEMBER_OF.
7901     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7902         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7903       return;
7904 
7905     // Reset the placeholder value to prepare the flag for the assignment of the
7906     // proper MEMBER_OF value.
7907     Flags &= ~OMP_MAP_MEMBER_OF;
7908     Flags |= MemberOfFlag;
7909   }
7910 
7911   void getPlainLayout(const CXXRecordDecl *RD,
7912                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7913                       bool AsBase) const {
7914     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7915 
7916     llvm::StructType *St =
7917         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7918 
7919     unsigned NumElements = St->getNumElements();
7920     llvm::SmallVector<
7921         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7922         RecordLayout(NumElements);
7923 
7924     // Fill bases.
7925     for (const auto &I : RD->bases()) {
7926       if (I.isVirtual())
7927         continue;
7928       const auto *Base = I.getType()->getAsCXXRecordDecl();
7929       // Ignore empty bases.
7930       if (Base->isEmpty() || CGF.getContext()
7931                                  .getASTRecordLayout(Base)
7932                                  .getNonVirtualSize()
7933                                  .isZero())
7934         continue;
7935 
7936       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7937       RecordLayout[FieldIndex] = Base;
7938     }
7939     // Fill in virtual bases.
7940     for (const auto &I : RD->vbases()) {
7941       const auto *Base = I.getType()->getAsCXXRecordDecl();
7942       // Ignore empty bases.
7943       if (Base->isEmpty())
7944         continue;
7945       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7946       if (RecordLayout[FieldIndex])
7947         continue;
7948       RecordLayout[FieldIndex] = Base;
7949     }
7950     // Fill in all the fields.
7951     assert(!RD->isUnion() && "Unexpected union.");
7952     for (const auto *Field : RD->fields()) {
7953       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7954       // will fill in later.)
7955       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7956         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7957         RecordLayout[FieldIndex] = Field;
7958       }
7959     }
7960     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7961              &Data : RecordLayout) {
7962       if (Data.isNull())
7963         continue;
7964       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7965         getPlainLayout(Base, Layout, /*AsBase=*/true);
7966       else
7967         Layout.push_back(Data.get<const FieldDecl *>());
7968     }
7969   }
7970 
7971 public:
7972   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7973       : CurDir(&Dir), CGF(CGF) {
7974     // Extract firstprivate clause information.
7975     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7976       for (const auto *D : C->varlists())
7977         FirstPrivateDecls.try_emplace(
7978             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7979     // Extract implicit firstprivates from uses_allocators clauses.
7980     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7981       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7982         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7983         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7984           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7985                                         /*Implicit=*/true);
7986         else if (const auto *VD = dyn_cast<VarDecl>(
7987                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7988                          ->getDecl()))
7989           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7990       }
7991     }
7992     // Extract device pointer clause information.
7993     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7994       for (auto L : C->component_lists())
7995         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
7996   }
7997 
7998   /// Constructor for the declare mapper directive.
7999   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8000       : CurDir(&Dir), CGF(CGF) {}
8001 
8002   /// Generate code for the combined entry if we have a partially mapped struct
8003   /// and take care of the mapping flags of the arguments corresponding to
8004   /// individual struct members.
8005   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8006                          MapFlagsArrayTy &CurTypes,
8007                          const StructRangeInfoTy &PartialStruct) const {
8008     // Base is the base of the struct
8009     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8010     // Pointer is the address of the lowest element
8011     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8012     CombinedInfo.Pointers.push_back(LB);
8013     // There should not be a mapper for a combined entry.
8014     CombinedInfo.Mappers.push_back(nullptr);
8015     // Size is (addr of {highest+1} element) - (addr of lowest element)
8016     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8017     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8018     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8019     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8020     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8021     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8022                                                   /*isSigned=*/false);
8023     CombinedInfo.Sizes.push_back(Size);
8024     // Map type is always TARGET_PARAM
8025     CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM);
8026     // If any element has the present modifier, then make sure the runtime
8027     // doesn't attempt to allocate the struct.
8028     if (CurTypes.end() !=
8029         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8030           return Type & OMP_MAP_PRESENT;
8031         }))
8032       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8033     // Remove TARGET_PARAM flag from the first element
8034     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8035 
8036     // All other current entries will be MEMBER_OF the combined entry
8037     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8038     // 0xFFFF in the MEMBER_OF field).
8039     OpenMPOffloadMappingFlags MemberOfFlag =
8040         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8041     for (auto &M : CurTypes)
8042       setCorrectMemberOfFlag(M, MemberOfFlag);
8043   }
8044 
8045   /// Generate all the base pointers, section pointers, sizes, map types, and
8046   /// mappers for the extracted mappable expressions (all included in \a
8047   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8048   /// pair of the relevant declaration and index where it occurs is appended to
8049   /// the device pointers info array.
8050   void generateAllInfo(
8051       MapCombinedInfoTy &CombinedInfo,
8052       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8053           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8054     // We have to process the component lists that relate with the same
8055     // declaration in a single chunk so that we can generate the map flags
8056     // correctly. Therefore, we organize all lists in a map.
8057     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8058 
8059     // Helper function to fill the information map for the different supported
8060     // clauses.
8061     auto &&InfoGen =
8062         [&Info, &SkipVarSet](
8063             const ValueDecl *D,
8064             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8065             OpenMPMapClauseKind MapType,
8066             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8067             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8068             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8069             bool ForDeviceAddr = false) {
8070           const ValueDecl *VD =
8071               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8072           if (SkipVarSet.count(VD))
8073             return;
8074           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8075                                 ReturnDevicePointer, IsImplicit, Mapper,
8076                                 ForDeviceAddr);
8077         };
8078 
8079     assert(CurDir.is<const OMPExecutableDirective *>() &&
8080            "Expect a executable directive");
8081     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8082     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8083       for (const auto L : C->component_lists()) {
8084         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8085                 C->getMapTypeModifiers(), llvm::None,
8086                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8087       }
8088     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8089       for (const auto L : C->component_lists()) {
8090         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8091                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8092                 C->isImplicit(), std::get<2>(L));
8093       }
8094     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8095       for (const auto L : C->component_lists()) {
8096         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8097                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8098                 C->isImplicit(), std::get<2>(L));
8099       }
8100 
8101     // Look at the use_device_ptr clause information and mark the existing map
8102     // entries as such. If there is no map information for an entry in the
8103     // use_device_ptr list, we create one with map type 'alloc' and zero size
8104     // section. It is the user fault if that was not mapped before. If there is
8105     // no map information and the pointer is a struct member, then we defer the
8106     // emission of that entry until the whole struct has been processed.
8107     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8108         DeferredInfo;
8109     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8110 
8111     for (const auto *C :
8112          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8113       for (const auto L : C->component_lists()) {
8114         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8115             std::get<1>(L);
8116         assert(!Components.empty() &&
8117                "Not expecting empty list of components!");
8118         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8119         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8120         const Expr *IE = Components.back().getAssociatedExpression();
8121         // If the first component is a member expression, we have to look into
8122         // 'this', which maps to null in the map of map information. Otherwise
8123         // look directly for the information.
8124         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8125 
8126         // We potentially have map information for this declaration already.
8127         // Look for the first set of components that refer to it.
8128         if (It != Info.end()) {
8129           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8130             return MI.Components.back().getAssociatedDeclaration() == VD;
8131           });
8132           // If we found a map entry, signal that the pointer has to be returned
8133           // and move on to the next declaration.
8134           // Exclude cases where the base pointer is mapped as array subscript,
8135           // array section or array shaping. The base address is passed as a
8136           // pointer to base in this case and cannot be used as a base for
8137           // use_device_ptr list item.
8138           if (CI != It->second.end()) {
8139             auto PrevCI = std::next(CI->Components.rbegin());
8140             const auto *VarD = dyn_cast<VarDecl>(VD);
8141             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8142                 isa<MemberExpr>(IE) ||
8143                 !VD->getType().getNonReferenceType()->isPointerType() ||
8144                 PrevCI == CI->Components.rend() ||
8145                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8146                 VarD->hasLocalStorage()) {
8147               CI->ReturnDevicePointer = true;
8148               continue;
8149             }
8150           }
8151         }
8152 
8153         // We didn't find any match in our map information - generate a zero
8154         // size array section - if the pointer is a struct member we defer this
8155         // action until the whole struct has been processed.
8156         if (isa<MemberExpr>(IE)) {
8157           // Insert the pointer into Info to be processed by
8158           // generateInfoForComponentList. Because it is a member pointer
8159           // without a pointee, no entry will be generated for it, therefore
8160           // we need to generate one after the whole struct has been processed.
8161           // Nonetheless, generateInfoForComponentList must be called to take
8162           // the pointer into account for the calculation of the range of the
8163           // partial struct.
8164           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8165                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8166           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8167         } else {
8168           llvm::Value *Ptr =
8169               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8170           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8171           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8172           UseDevicePtrCombinedInfo.Sizes.push_back(
8173               llvm::Constant::getNullValue(CGF.Int64Ty));
8174           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM |
8175                                                    OMP_MAP_TARGET_PARAM);
8176           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8177         }
8178       }
8179     }
8180 
8181     // Look at the use_device_addr clause information and mark the existing map
8182     // entries as such. If there is no map information for an entry in the
8183     // use_device_addr list, we create one with map type 'alloc' and zero size
8184     // section. It is the user fault if that was not mapped before. If there is
8185     // no map information and the pointer is a struct member, then we defer the
8186     // emission of that entry until the whole struct has been processed.
8187     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8188     for (const auto *C :
8189          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8190       for (const auto L : C->component_lists()) {
8191         assert(!std::get<1>(L).empty() &&
8192                "Not expecting empty list of components!");
8193         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8194         if (!Processed.insert(VD).second)
8195           continue;
8196         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8197         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8198         // If the first component is a member expression, we have to look into
8199         // 'this', which maps to null in the map of map information. Otherwise
8200         // look directly for the information.
8201         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8202 
8203         // We potentially have map information for this declaration already.
8204         // Look for the first set of components that refer to it.
8205         if (It != Info.end()) {
8206           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8207             return MI.Components.back().getAssociatedDeclaration() == VD;
8208           });
8209           // If we found a map entry, signal that the pointer has to be returned
8210           // and move on to the next declaration.
8211           if (CI != It->second.end()) {
8212             CI->ReturnDevicePointer = true;
8213             continue;
8214           }
8215         }
8216 
8217         // We didn't find any match in our map information - generate a zero
8218         // size array section - if the pointer is a struct member we defer this
8219         // action until the whole struct has been processed.
8220         if (isa<MemberExpr>(IE)) {
8221           // Insert the pointer into Info to be processed by
8222           // generateInfoForComponentList. Because it is a member pointer
8223           // without a pointee, no entry will be generated for it, therefore
8224           // we need to generate one after the whole struct has been processed.
8225           // Nonetheless, generateInfoForComponentList must be called to take
8226           // the pointer into account for the calculation of the range of the
8227           // partial struct.
8228           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8229                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8230                   nullptr, /*ForDeviceAddr=*/true);
8231           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8232         } else {
8233           llvm::Value *Ptr;
8234           if (IE->isGLValue())
8235             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8236           else
8237             Ptr = CGF.EmitScalarExpr(IE);
8238           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8239           CombinedInfo.Pointers.push_back(Ptr);
8240           CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8241           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8242           CombinedInfo.Mappers.push_back(nullptr);
8243         }
8244       }
8245     }
8246 
8247     for (const auto &M : Info) {
8248       // We need to know when we generate information for the first component
8249       // associated with a capture, because the mapping flags depend on it.
8250       bool IsFirstComponentList = true;
8251 
8252       // Temporary generated information.
8253       MapCombinedInfoTy CurInfo;
8254       StructRangeInfoTy PartialStruct;
8255 
8256       for (const MapInfo &L : M.second) {
8257         assert(!L.Components.empty() &&
8258                "Not expecting declaration with no component lists.");
8259 
8260         // Remember the current base pointer index.
8261         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8262         generateInfoForComponentList(L.MapType, L.MapModifiers,
8263                                      L.MotionModifiers, L.Components, CurInfo,
8264                                      PartialStruct, IsFirstComponentList,
8265                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8266 
8267         // If this entry relates with a device pointer, set the relevant
8268         // declaration and add the 'return pointer' flag.
8269         if (L.ReturnDevicePointer) {
8270           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8271                  "Unexpected number of mapped base pointers.");
8272 
8273           const ValueDecl *RelevantVD =
8274               L.Components.back().getAssociatedDeclaration();
8275           assert(RelevantVD &&
8276                  "No relevant declaration related with device pointer??");
8277 
8278           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8279               RelevantVD);
8280           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8281         }
8282         IsFirstComponentList = false;
8283       }
8284 
8285       // Append any pending zero-length pointers which are struct members and
8286       // used with use_device_ptr or use_device_addr.
8287       auto CI = DeferredInfo.find(M.first);
8288       if (CI != DeferredInfo.end()) {
8289         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8290           llvm::Value *BasePtr;
8291           llvm::Value *Ptr;
8292           if (L.ForDeviceAddr) {
8293             if (L.IE->isGLValue())
8294               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8295             else
8296               Ptr = this->CGF.EmitScalarExpr(L.IE);
8297             BasePtr = Ptr;
8298             // Entry is RETURN_PARAM. Also, set the placeholder value
8299             // MEMBER_OF=FFFF so that the entry is later updated with the
8300             // correct value of MEMBER_OF.
8301             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8302           } else {
8303             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8304             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8305                                              L.IE->getExprLoc());
8306             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8307             // value MEMBER_OF=FFFF so that the entry is later updated with the
8308             // correct value of MEMBER_OF.
8309             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8310                                     OMP_MAP_MEMBER_OF);
8311           }
8312           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8313           CurInfo.Pointers.push_back(Ptr);
8314           CurInfo.Sizes.push_back(
8315               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8316           CurInfo.Mappers.push_back(nullptr);
8317         }
8318       }
8319 
8320       // If there is an entry in PartialStruct it means we have a struct with
8321       // individual members mapped. Emit an extra combined entry.
8322       if (PartialStruct.Base.isValid())
8323         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8324 
8325       // We need to append the results of this capture to what we already have.
8326       CombinedInfo.append(CurInfo);
8327     }
8328     // Append data for use_device_ptr clauses.
8329     CombinedInfo.append(UseDevicePtrCombinedInfo);
8330   }
8331 
8332   /// Generate all the base pointers, section pointers, sizes, map types, and
8333   /// mappers for the extracted map clauses of user-defined mapper (all included
8334   /// in \a CombinedInfo).
8335   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8336     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8337            "Expect a declare mapper directive");
8338     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8339     // We have to process the component lists that relate with the same
8340     // declaration in a single chunk so that we can generate the map flags
8341     // correctly. Therefore, we organize all lists in a map.
8342     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8343 
8344     // Fill the information map for map clauses.
8345     for (const auto *C : CurMapperDir->clauselists()) {
8346       const auto *MC = cast<OMPMapClause>(C);
8347       for (const auto L : MC->component_lists()) {
8348         const ValueDecl *VD =
8349             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8350                            : nullptr;
8351         // Get the corresponding user-defined mapper.
8352         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8353                               MC->getMapTypeModifiers(), llvm::None,
8354                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8355                               std::get<2>(L));
8356       }
8357     }
8358 
8359     for (const auto &M : Info) {
8360       // We need to know when we generate information for the first component
8361       // associated with a capture, because the mapping flags depend on it.
8362       bool IsFirstComponentList = true;
8363 
8364       // Temporary generated information.
8365       MapCombinedInfoTy CurInfo;
8366       StructRangeInfoTy PartialStruct;
8367 
8368       for (const MapInfo &L : M.second) {
8369         assert(!L.Components.empty() &&
8370                "Not expecting declaration with no component lists.");
8371         generateInfoForComponentList(L.MapType, L.MapModifiers,
8372                                      L.MotionModifiers, L.Components, CurInfo,
8373                                      PartialStruct, IsFirstComponentList,
8374                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8375         IsFirstComponentList = false;
8376       }
8377 
8378       // If there is an entry in PartialStruct it means we have a struct with
8379       // individual members mapped. Emit an extra combined entry.
8380       if (PartialStruct.Base.isValid())
8381         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8382 
8383       // We need to append the results of this capture to what we already have.
8384       CombinedInfo.append(CurInfo);
8385     }
8386   }
8387 
8388   /// Emit capture info for lambdas for variables captured by reference.
8389   void generateInfoForLambdaCaptures(
8390       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8391       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8392     const auto *RD = VD->getType()
8393                          .getCanonicalType()
8394                          .getNonReferenceType()
8395                          ->getAsCXXRecordDecl();
8396     if (!RD || !RD->isLambda())
8397       return;
8398     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8399     LValue VDLVal = CGF.MakeAddrLValue(
8400         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8401     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8402     FieldDecl *ThisCapture = nullptr;
8403     RD->getCaptureFields(Captures, ThisCapture);
8404     if (ThisCapture) {
8405       LValue ThisLVal =
8406           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8407       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8408       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8409                                  VDLVal.getPointer(CGF));
8410       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8411       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8412       CombinedInfo.Sizes.push_back(
8413           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8414                                     CGF.Int64Ty, /*isSigned=*/true));
8415       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8416                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8417       CombinedInfo.Mappers.push_back(nullptr);
8418     }
8419     for (const LambdaCapture &LC : RD->captures()) {
8420       if (!LC.capturesVariable())
8421         continue;
8422       const VarDecl *VD = LC.getCapturedVar();
8423       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8424         continue;
8425       auto It = Captures.find(VD);
8426       assert(It != Captures.end() && "Found lambda capture without field.");
8427       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8428       if (LC.getCaptureKind() == LCK_ByRef) {
8429         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8430         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8431                                    VDLVal.getPointer(CGF));
8432         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8433         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8434         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8435             CGF.getTypeSize(
8436                 VD->getType().getCanonicalType().getNonReferenceType()),
8437             CGF.Int64Ty, /*isSigned=*/true));
8438       } else {
8439         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8440         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8441                                    VDLVal.getPointer(CGF));
8442         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8443         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8444         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8445       }
8446       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8447                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8448       CombinedInfo.Mappers.push_back(nullptr);
8449     }
8450   }
8451 
8452   /// Set correct indices for lambdas captures.
8453   void adjustMemberOfForLambdaCaptures(
8454       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8455       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8456       MapFlagsArrayTy &Types) const {
8457     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8458       // Set correct member_of idx for all implicit lambda captures.
8459       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8460                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8461         continue;
8462       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8463       assert(BasePtr && "Unable to find base lambda address.");
8464       int TgtIdx = -1;
8465       for (unsigned J = I; J > 0; --J) {
8466         unsigned Idx = J - 1;
8467         if (Pointers[Idx] != BasePtr)
8468           continue;
8469         TgtIdx = Idx;
8470         break;
8471       }
8472       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8473       // All other current entries will be MEMBER_OF the combined entry
8474       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8475       // 0xFFFF in the MEMBER_OF field).
8476       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8477       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8478     }
8479   }
8480 
8481   /// Generate the base pointers, section pointers, sizes, map types, and
8482   /// mappers associated to a given capture (all included in \a CombinedInfo).
8483   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8484                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8485                               StructRangeInfoTy &PartialStruct) const {
8486     assert(!Cap->capturesVariableArrayType() &&
8487            "Not expecting to generate map info for a variable array type!");
8488 
8489     // We need to know when we generating information for the first component
8490     const ValueDecl *VD = Cap->capturesThis()
8491                               ? nullptr
8492                               : Cap->getCapturedVar()->getCanonicalDecl();
8493 
8494     // If this declaration appears in a is_device_ptr clause we just have to
8495     // pass the pointer by value. If it is a reference to a declaration, we just
8496     // pass its value.
8497     if (DevPointersMap.count(VD)) {
8498       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8499       CombinedInfo.Pointers.push_back(Arg);
8500       CombinedInfo.Sizes.push_back(
8501           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8502                                     CGF.Int64Ty, /*isSigned=*/true));
8503       CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8504       CombinedInfo.Mappers.push_back(nullptr);
8505       return;
8506     }
8507 
8508     using MapData =
8509         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8510                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8511                    const ValueDecl *>;
8512     SmallVector<MapData, 4> DeclComponentLists;
8513     assert(CurDir.is<const OMPExecutableDirective *>() &&
8514            "Expect a executable directive");
8515     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8516     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8517       for (const auto L : C->decl_component_lists(VD)) {
8518         const ValueDecl *VDecl, *Mapper;
8519         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8520         std::tie(VDecl, Components, Mapper) = L;
8521         assert(VDecl == VD && "We got information for the wrong declaration??");
8522         assert(!Components.empty() &&
8523                "Not expecting declaration with no component lists.");
8524         DeclComponentLists.emplace_back(Components, C->getMapType(),
8525                                         C->getMapTypeModifiers(),
8526                                         C->isImplicit(), Mapper);
8527       }
8528     }
8529 
8530     // Find overlapping elements (including the offset from the base element).
8531     llvm::SmallDenseMap<
8532         const MapData *,
8533         llvm::SmallVector<
8534             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8535         4>
8536         OverlappedData;
8537     size_t Count = 0;
8538     for (const MapData &L : DeclComponentLists) {
8539       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8540       OpenMPMapClauseKind MapType;
8541       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8542       bool IsImplicit;
8543       const ValueDecl *Mapper;
8544       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8545       ++Count;
8546       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8547         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8548         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8549         auto CI = Components.rbegin();
8550         auto CE = Components.rend();
8551         auto SI = Components1.rbegin();
8552         auto SE = Components1.rend();
8553         for (; CI != CE && SI != SE; ++CI, ++SI) {
8554           if (CI->getAssociatedExpression()->getStmtClass() !=
8555               SI->getAssociatedExpression()->getStmtClass())
8556             break;
8557           // Are we dealing with different variables/fields?
8558           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8559             break;
8560         }
8561         // Found overlapping if, at least for one component, reached the head of
8562         // the components list.
8563         if (CI == CE || SI == SE) {
8564           assert((CI != CE || SI != SE) &&
8565                  "Unexpected full match of the mapping components.");
8566           const MapData &BaseData = CI == CE ? L : L1;
8567           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8568               SI == SE ? Components : Components1;
8569           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8570           OverlappedElements.getSecond().push_back(SubData);
8571         }
8572       }
8573     }
8574     // Sort the overlapped elements for each item.
8575     llvm::SmallVector<const FieldDecl *, 4> Layout;
8576     if (!OverlappedData.empty()) {
8577       if (const auto *CRD =
8578               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8579         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8580       else {
8581         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8582         Layout.append(RD->field_begin(), RD->field_end());
8583       }
8584     }
8585     for (auto &Pair : OverlappedData) {
8586       llvm::sort(
8587           Pair.getSecond(),
8588           [&Layout](
8589               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8590               OMPClauseMappableExprCommon::MappableExprComponentListRef
8591                   Second) {
8592             auto CI = First.rbegin();
8593             auto CE = First.rend();
8594             auto SI = Second.rbegin();
8595             auto SE = Second.rend();
8596             for (; CI != CE && SI != SE; ++CI, ++SI) {
8597               if (CI->getAssociatedExpression()->getStmtClass() !=
8598                   SI->getAssociatedExpression()->getStmtClass())
8599                 break;
8600               // Are we dealing with different variables/fields?
8601               if (CI->getAssociatedDeclaration() !=
8602                   SI->getAssociatedDeclaration())
8603                 break;
8604             }
8605 
8606             // Lists contain the same elements.
8607             if (CI == CE && SI == SE)
8608               return false;
8609 
8610             // List with less elements is less than list with more elements.
8611             if (CI == CE || SI == SE)
8612               return CI == CE;
8613 
8614             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8615             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8616             if (FD1->getParent() == FD2->getParent())
8617               return FD1->getFieldIndex() < FD2->getFieldIndex();
8618             const auto It =
8619                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8620                   return FD == FD1 || FD == FD2;
8621                 });
8622             return *It == FD1;
8623           });
8624     }
8625 
8626     // Associated with a capture, because the mapping flags depend on it.
8627     // Go through all of the elements with the overlapped elements.
8628     for (const auto &Pair : OverlappedData) {
8629       const MapData &L = *Pair.getFirst();
8630       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8631       OpenMPMapClauseKind MapType;
8632       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8633       bool IsImplicit;
8634       const ValueDecl *Mapper;
8635       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8636       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8637           OverlappedComponents = Pair.getSecond();
8638       bool IsFirstComponentList = true;
8639       generateInfoForComponentList(
8640           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8641           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8642           /*ForDeviceAddr=*/false, OverlappedComponents);
8643     }
8644     // Go through other elements without overlapped elements.
8645     bool IsFirstComponentList = OverlappedData.empty();
8646     for (const MapData &L : DeclComponentLists) {
8647       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8648       OpenMPMapClauseKind MapType;
8649       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8650       bool IsImplicit;
8651       const ValueDecl *Mapper;
8652       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8653       auto It = OverlappedData.find(&L);
8654       if (It == OverlappedData.end())
8655         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8656                                      Components, CombinedInfo, PartialStruct,
8657                                      IsFirstComponentList, IsImplicit, Mapper);
8658       IsFirstComponentList = false;
8659     }
8660   }
8661 
8662   /// Generate the default map information for a given capture \a CI,
8663   /// record field declaration \a RI and captured value \a CV.
8664   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8665                               const FieldDecl &RI, llvm::Value *CV,
8666                               MapCombinedInfoTy &CombinedInfo) const {
8667     bool IsImplicit = true;
8668     // Do the default mapping.
8669     if (CI.capturesThis()) {
8670       CombinedInfo.BasePointers.push_back(CV);
8671       CombinedInfo.Pointers.push_back(CV);
8672       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8673       CombinedInfo.Sizes.push_back(
8674           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8675                                     CGF.Int64Ty, /*isSigned=*/true));
8676       // Default map type.
8677       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8678     } else if (CI.capturesVariableByCopy()) {
8679       CombinedInfo.BasePointers.push_back(CV);
8680       CombinedInfo.Pointers.push_back(CV);
8681       if (!RI.getType()->isAnyPointerType()) {
8682         // We have to signal to the runtime captures passed by value that are
8683         // not pointers.
8684         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8685         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8686             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8687       } else {
8688         // Pointers are implicitly mapped with a zero size and no flags
8689         // (other than first map that is added for all implicit maps).
8690         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8691         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8692       }
8693       const VarDecl *VD = CI.getCapturedVar();
8694       auto I = FirstPrivateDecls.find(VD);
8695       if (I != FirstPrivateDecls.end())
8696         IsImplicit = I->getSecond();
8697     } else {
8698       assert(CI.capturesVariable() && "Expected captured reference.");
8699       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8700       QualType ElementType = PtrTy->getPointeeType();
8701       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8702           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8703       // The default map type for a scalar/complex type is 'to' because by
8704       // default the value doesn't have to be retrieved. For an aggregate
8705       // type, the default is 'tofrom'.
8706       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8707       const VarDecl *VD = CI.getCapturedVar();
8708       auto I = FirstPrivateDecls.find(VD);
8709       if (I != FirstPrivateDecls.end() &&
8710           VD->getType().isConstant(CGF.getContext())) {
8711         llvm::Constant *Addr =
8712             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8713         // Copy the value of the original variable to the new global copy.
8714         CGF.Builder.CreateMemCpy(
8715             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8716             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8717             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8718         // Use new global variable as the base pointers.
8719         CombinedInfo.BasePointers.push_back(Addr);
8720         CombinedInfo.Pointers.push_back(Addr);
8721       } else {
8722         CombinedInfo.BasePointers.push_back(CV);
8723         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8724           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8725               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8726               AlignmentSource::Decl));
8727           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8728         } else {
8729           CombinedInfo.Pointers.push_back(CV);
8730         }
8731       }
8732       if (I != FirstPrivateDecls.end())
8733         IsImplicit = I->getSecond();
8734     }
8735     // Every default map produces a single argument which is a target parameter.
8736     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8737 
8738     // Add flag stating this is an implicit map.
8739     if (IsImplicit)
8740       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8741 
8742     // No user-defined mapper for default mapping.
8743     CombinedInfo.Mappers.push_back(nullptr);
8744   }
8745 };
8746 } // anonymous namespace
8747 
8748 /// Emit the arrays used to pass the captures and map information to the
8749 /// offloading runtime library. If there is no map or capture information,
8750 /// return nullptr by reference.
8751 static void
8752 emitOffloadingArrays(CodeGenFunction &CGF,
8753                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8754                      CGOpenMPRuntime::TargetDataInfo &Info) {
8755   CodeGenModule &CGM = CGF.CGM;
8756   ASTContext &Ctx = CGF.getContext();
8757 
8758   // Reset the array information.
8759   Info.clearArrayInfo();
8760   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8761 
8762   if (Info.NumberOfPtrs) {
8763     // Detect if we have any capture size requiring runtime evaluation of the
8764     // size so that a constant array could be eventually used.
8765     bool hasRuntimeEvaluationCaptureSize = false;
8766     for (llvm::Value *S : CombinedInfo.Sizes)
8767       if (!isa<llvm::Constant>(S)) {
8768         hasRuntimeEvaluationCaptureSize = true;
8769         break;
8770       }
8771 
8772     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8773     QualType PointerArrayType = Ctx.getConstantArrayType(
8774         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8775         /*IndexTypeQuals=*/0);
8776 
8777     Info.BasePointersArray =
8778         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8779     Info.PointersArray =
8780         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8781     Address MappersArray =
8782         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
8783     Info.MappersArray = MappersArray.getPointer();
8784 
8785     // If we don't have any VLA types or other types that require runtime
8786     // evaluation, we can use a constant array for the map sizes, otherwise we
8787     // need to fill up the arrays as we do for the pointers.
8788     QualType Int64Ty =
8789         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8790     if (hasRuntimeEvaluationCaptureSize) {
8791       QualType SizeArrayType = Ctx.getConstantArrayType(
8792           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8793           /*IndexTypeQuals=*/0);
8794       Info.SizesArray =
8795           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8796     } else {
8797       // We expect all the sizes to be constant, so we collect them to create
8798       // a constant array.
8799       SmallVector<llvm::Constant *, 16> ConstSizes;
8800       for (llvm::Value *S : CombinedInfo.Sizes)
8801         ConstSizes.push_back(cast<llvm::Constant>(S));
8802 
8803       auto *SizesArrayInit = llvm::ConstantArray::get(
8804           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8805       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8806       auto *SizesArrayGbl = new llvm::GlobalVariable(
8807           CGM.getModule(), SizesArrayInit->getType(),
8808           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8809           SizesArrayInit, Name);
8810       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8811       Info.SizesArray = SizesArrayGbl;
8812     }
8813 
8814     // The map types are always constant so we don't need to generate code to
8815     // fill arrays. Instead, we create an array constant.
8816     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
8817     llvm::copy(CombinedInfo.Types, Mapping.begin());
8818     llvm::Constant *MapTypesArrayInit =
8819         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8820     std::string MaptypesName =
8821         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8822     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8823         CGM.getModule(), MapTypesArrayInit->getType(),
8824         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8825         MapTypesArrayInit, MaptypesName);
8826     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8827     Info.MapTypesArray = MapTypesArrayGbl;
8828 
8829     // If there's a present map type modifier, it must not be applied to the end
8830     // of a region, so generate a separate map type array in that case.
8831     if (Info.separateBeginEndCalls()) {
8832       bool EndMapTypesDiffer = false;
8833       for (uint64_t &Type : Mapping) {
8834         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
8835           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
8836           EndMapTypesDiffer = true;
8837         }
8838       }
8839       if (EndMapTypesDiffer) {
8840         MapTypesArrayInit =
8841             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8842         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8843         MapTypesArrayGbl = new llvm::GlobalVariable(
8844             CGM.getModule(), MapTypesArrayInit->getType(),
8845             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8846             MapTypesArrayInit, MaptypesName);
8847         MapTypesArrayGbl->setUnnamedAddr(
8848             llvm::GlobalValue::UnnamedAddr::Global);
8849         Info.MapTypesArrayEnd = MapTypesArrayGbl;
8850       }
8851     }
8852 
8853     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8854       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
8855       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8856           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8857           Info.BasePointersArray, 0, I);
8858       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8859           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8860       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8861       CGF.Builder.CreateStore(BPVal, BPAddr);
8862 
8863       if (Info.requiresDevicePointerInfo())
8864         if (const ValueDecl *DevVD =
8865                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
8866           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8867 
8868       llvm::Value *PVal = CombinedInfo.Pointers[I];
8869       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8870           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8871           Info.PointersArray, 0, I);
8872       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8873           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8874       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8875       CGF.Builder.CreateStore(PVal, PAddr);
8876 
8877       if (hasRuntimeEvaluationCaptureSize) {
8878         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8879             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8880             Info.SizesArray,
8881             /*Idx0=*/0,
8882             /*Idx1=*/I);
8883         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8884         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
8885                                                           CGM.Int64Ty,
8886                                                           /*isSigned=*/true),
8887                                 SAddr);
8888       }
8889 
8890       // Fill up the mapper array.
8891       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
8892       if (CombinedInfo.Mappers[I]) {
8893         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8894             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8895         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
8896         Info.HasMapper = true;
8897       }
8898       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
8899       CGF.Builder.CreateStore(MFunc, MAddr);
8900     }
8901   }
8902 }
8903 
8904 /// Emit the arguments to be passed to the runtime library based on the
8905 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
8906 /// ForEndCall, emit map types to be passed for the end of the region instead of
8907 /// the beginning.
8908 static void emitOffloadingArraysArgument(
8909     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8910     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8911     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
8912     CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) {
8913   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
8914          "expected region end call to runtime only when end call is separate");
8915   CodeGenModule &CGM = CGF.CGM;
8916   if (Info.NumberOfPtrs) {
8917     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8918         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8919         Info.BasePointersArray,
8920         /*Idx0=*/0, /*Idx1=*/0);
8921     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8922         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8923         Info.PointersArray,
8924         /*Idx0=*/0,
8925         /*Idx1=*/0);
8926     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8927         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8928         /*Idx0=*/0, /*Idx1=*/0);
8929     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8930         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8931         ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
8932                                             : Info.MapTypesArray,
8933         /*Idx0=*/0,
8934         /*Idx1=*/0);
8935     MappersArrayArg =
8936         Info.HasMapper
8937             ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
8938             : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8939   } else {
8940     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8941     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8942     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8943     MapTypesArrayArg =
8944         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8945     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8946   }
8947 }
8948 
8949 /// Check for inner distribute directive.
8950 static const OMPExecutableDirective *
8951 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8952   const auto *CS = D.getInnermostCapturedStmt();
8953   const auto *Body =
8954       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8955   const Stmt *ChildStmt =
8956       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8957 
8958   if (const auto *NestedDir =
8959           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8960     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8961     switch (D.getDirectiveKind()) {
8962     case OMPD_target:
8963       if (isOpenMPDistributeDirective(DKind))
8964         return NestedDir;
8965       if (DKind == OMPD_teams) {
8966         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8967             /*IgnoreCaptured=*/true);
8968         if (!Body)
8969           return nullptr;
8970         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8971         if (const auto *NND =
8972                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8973           DKind = NND->getDirectiveKind();
8974           if (isOpenMPDistributeDirective(DKind))
8975             return NND;
8976         }
8977       }
8978       return nullptr;
8979     case OMPD_target_teams:
8980       if (isOpenMPDistributeDirective(DKind))
8981         return NestedDir;
8982       return nullptr;
8983     case OMPD_target_parallel:
8984     case OMPD_target_simd:
8985     case OMPD_target_parallel_for:
8986     case OMPD_target_parallel_for_simd:
8987       return nullptr;
8988     case OMPD_target_teams_distribute:
8989     case OMPD_target_teams_distribute_simd:
8990     case OMPD_target_teams_distribute_parallel_for:
8991     case OMPD_target_teams_distribute_parallel_for_simd:
8992     case OMPD_parallel:
8993     case OMPD_for:
8994     case OMPD_parallel_for:
8995     case OMPD_parallel_master:
8996     case OMPD_parallel_sections:
8997     case OMPD_for_simd:
8998     case OMPD_parallel_for_simd:
8999     case OMPD_cancel:
9000     case OMPD_cancellation_point:
9001     case OMPD_ordered:
9002     case OMPD_threadprivate:
9003     case OMPD_allocate:
9004     case OMPD_task:
9005     case OMPD_simd:
9006     case OMPD_sections:
9007     case OMPD_section:
9008     case OMPD_single:
9009     case OMPD_master:
9010     case OMPD_critical:
9011     case OMPD_taskyield:
9012     case OMPD_barrier:
9013     case OMPD_taskwait:
9014     case OMPD_taskgroup:
9015     case OMPD_atomic:
9016     case OMPD_flush:
9017     case OMPD_depobj:
9018     case OMPD_scan:
9019     case OMPD_teams:
9020     case OMPD_target_data:
9021     case OMPD_target_exit_data:
9022     case OMPD_target_enter_data:
9023     case OMPD_distribute:
9024     case OMPD_distribute_simd:
9025     case OMPD_distribute_parallel_for:
9026     case OMPD_distribute_parallel_for_simd:
9027     case OMPD_teams_distribute:
9028     case OMPD_teams_distribute_simd:
9029     case OMPD_teams_distribute_parallel_for:
9030     case OMPD_teams_distribute_parallel_for_simd:
9031     case OMPD_target_update:
9032     case OMPD_declare_simd:
9033     case OMPD_declare_variant:
9034     case OMPD_begin_declare_variant:
9035     case OMPD_end_declare_variant:
9036     case OMPD_declare_target:
9037     case OMPD_end_declare_target:
9038     case OMPD_declare_reduction:
9039     case OMPD_declare_mapper:
9040     case OMPD_taskloop:
9041     case OMPD_taskloop_simd:
9042     case OMPD_master_taskloop:
9043     case OMPD_master_taskloop_simd:
9044     case OMPD_parallel_master_taskloop:
9045     case OMPD_parallel_master_taskloop_simd:
9046     case OMPD_requires:
9047     case OMPD_unknown:
9048     default:
9049       llvm_unreachable("Unexpected directive.");
9050     }
9051   }
9052 
9053   return nullptr;
9054 }
9055 
9056 /// Emit the user-defined mapper function. The code generation follows the
9057 /// pattern in the example below.
9058 /// \code
9059 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9060 ///                                           void *base, void *begin,
9061 ///                                           int64_t size, int64_t type) {
9062 ///   // Allocate space for an array section first.
9063 ///   if (size > 1 && !maptype.IsDelete)
9064 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9065 ///                                 size*sizeof(Ty), clearToFrom(type));
9066 ///   // Map members.
9067 ///   for (unsigned i = 0; i < size; i++) {
9068 ///     // For each component specified by this mapper:
9069 ///     for (auto c : all_components) {
9070 ///       if (c.hasMapper())
9071 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9072 ///                       c.arg_type);
9073 ///       else
9074 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9075 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9076 ///     }
9077 ///   }
9078 ///   // Delete the array section.
9079 ///   if (size > 1 && maptype.IsDelete)
9080 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9081 ///                                 size*sizeof(Ty), clearToFrom(type));
9082 /// }
9083 /// \endcode
9084 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9085                                             CodeGenFunction *CGF) {
9086   if (UDMMap.count(D) > 0)
9087     return;
9088   ASTContext &C = CGM.getContext();
9089   QualType Ty = D->getType();
9090   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9091   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9092   auto *MapperVarDecl =
9093       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9094   SourceLocation Loc = D->getLocation();
9095   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9096 
9097   // Prepare mapper function arguments and attributes.
9098   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9099                               C.VoidPtrTy, ImplicitParamDecl::Other);
9100   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9101                             ImplicitParamDecl::Other);
9102   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9103                              C.VoidPtrTy, ImplicitParamDecl::Other);
9104   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9105                             ImplicitParamDecl::Other);
9106   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9107                             ImplicitParamDecl::Other);
9108   FunctionArgList Args;
9109   Args.push_back(&HandleArg);
9110   Args.push_back(&BaseArg);
9111   Args.push_back(&BeginArg);
9112   Args.push_back(&SizeArg);
9113   Args.push_back(&TypeArg);
9114   const CGFunctionInfo &FnInfo =
9115       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9116   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9117   SmallString<64> TyStr;
9118   llvm::raw_svector_ostream Out(TyStr);
9119   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9120   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9121   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9122                                     Name, &CGM.getModule());
9123   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9124   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9125   // Start the mapper function code generation.
9126   CodeGenFunction MapperCGF(CGM);
9127   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9128   // Compute the starting and end addreses of array elements.
9129   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9130       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9131       C.getPointerType(Int64Ty), Loc);
9132   // Convert the size in bytes into the number of array elements.
9133   Size = MapperCGF.Builder.CreateExactUDiv(
9134       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9135   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9136       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9137       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9138   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9139   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9140       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9141       C.getPointerType(Int64Ty), Loc);
9142   // Prepare common arguments for array initiation and deletion.
9143   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9144       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9145       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9146   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9147       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9148       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9149   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9150       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9151       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9152 
9153   // Emit array initiation if this is an array section and \p MapType indicates
9154   // that memory allocation is required.
9155   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9156   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9157                              ElementSize, HeadBB, /*IsInit=*/true);
9158 
9159   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9160 
9161   // Emit the loop header block.
9162   MapperCGF.EmitBlock(HeadBB);
9163   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9164   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9165   // Evaluate whether the initial condition is satisfied.
9166   llvm::Value *IsEmpty =
9167       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9168   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9169   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9170 
9171   // Emit the loop body block.
9172   MapperCGF.EmitBlock(BodyBB);
9173   llvm::BasicBlock *LastBB = BodyBB;
9174   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9175       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9176   PtrPHI->addIncoming(PtrBegin, EntryBB);
9177   Address PtrCurrent =
9178       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9179                           .getAlignment()
9180                           .alignmentOfArrayElement(ElementSize));
9181   // Privatize the declared variable of mapper to be the current array element.
9182   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9183   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9184     return MapperCGF
9185         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9186         .getAddress(MapperCGF);
9187   });
9188   (void)Scope.Privatize();
9189 
9190   // Get map clause information. Fill up the arrays with all mapped variables.
9191   MappableExprsHandler::MapCombinedInfoTy Info;
9192   MappableExprsHandler MEHandler(*D, MapperCGF);
9193   MEHandler.generateAllInfoForMapper(Info);
9194 
9195   // Call the runtime API __tgt_mapper_num_components to get the number of
9196   // pre-existing components.
9197   llvm::Value *OffloadingArgs[] = {Handle};
9198   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9199       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9200                                             OMPRTL___tgt_mapper_num_components),
9201       OffloadingArgs);
9202   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9203       PreviousSize,
9204       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9205 
9206   // Fill up the runtime mapper handle for all components.
9207   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9208     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9209         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9210     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9211         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9212     llvm::Value *CurSizeArg = Info.Sizes[I];
9213 
9214     // Extract the MEMBER_OF field from the map type.
9215     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9216     MapperCGF.EmitBlock(MemberBB);
9217     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9218     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9219         OriMapType,
9220         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9221     llvm::BasicBlock *MemberCombineBB =
9222         MapperCGF.createBasicBlock("omp.member.combine");
9223     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9224     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9225     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9226     // Add the number of pre-existing components to the MEMBER_OF field if it
9227     // is valid.
9228     MapperCGF.EmitBlock(MemberCombineBB);
9229     llvm::Value *CombinedMember =
9230         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9231     // Do nothing if it is not a member of previous components.
9232     MapperCGF.EmitBlock(TypeBB);
9233     llvm::PHINode *MemberMapType =
9234         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9235     MemberMapType->addIncoming(OriMapType, MemberBB);
9236     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9237 
9238     // Combine the map type inherited from user-defined mapper with that
9239     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9240     // bits of the \a MapType, which is the input argument of the mapper
9241     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9242     // bits of MemberMapType.
9243     // [OpenMP 5.0], 1.2.6. map-type decay.
9244     //        | alloc |  to   | from  | tofrom | release | delete
9245     // ----------------------------------------------------------
9246     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9247     // to     | alloc |  to   | alloc |   to   | release | delete
9248     // from   | alloc | alloc | from  |  from  | release | delete
9249     // tofrom | alloc |  to   | from  | tofrom | release | delete
9250     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9251         MapType,
9252         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9253                                    MappableExprsHandler::OMP_MAP_FROM));
9254     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9255     llvm::BasicBlock *AllocElseBB =
9256         MapperCGF.createBasicBlock("omp.type.alloc.else");
9257     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9258     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9259     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9260     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9261     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9262     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9263     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9264     MapperCGF.EmitBlock(AllocBB);
9265     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9266         MemberMapType,
9267         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9268                                      MappableExprsHandler::OMP_MAP_FROM)));
9269     MapperCGF.Builder.CreateBr(EndBB);
9270     MapperCGF.EmitBlock(AllocElseBB);
9271     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9272         LeftToFrom,
9273         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9274     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9275     // In case of to, clear OMP_MAP_FROM.
9276     MapperCGF.EmitBlock(ToBB);
9277     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9278         MemberMapType,
9279         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9280     MapperCGF.Builder.CreateBr(EndBB);
9281     MapperCGF.EmitBlock(ToElseBB);
9282     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9283         LeftToFrom,
9284         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9285     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9286     // In case of from, clear OMP_MAP_TO.
9287     MapperCGF.EmitBlock(FromBB);
9288     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9289         MemberMapType,
9290         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9291     // In case of tofrom, do nothing.
9292     MapperCGF.EmitBlock(EndBB);
9293     LastBB = EndBB;
9294     llvm::PHINode *CurMapType =
9295         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9296     CurMapType->addIncoming(AllocMapType, AllocBB);
9297     CurMapType->addIncoming(ToMapType, ToBB);
9298     CurMapType->addIncoming(FromMapType, FromBB);
9299     CurMapType->addIncoming(MemberMapType, ToElseBB);
9300 
9301     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9302                                      CurSizeArg, CurMapType};
9303     if (Info.Mappers[I]) {
9304       // Call the corresponding mapper function.
9305       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9306           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9307       assert(MapperFunc && "Expect a valid mapper function is available.");
9308       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9309     } else {
9310       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9311       // data structure.
9312       MapperCGF.EmitRuntimeCall(
9313           OMPBuilder.getOrCreateRuntimeFunction(
9314               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9315           OffloadingArgs);
9316     }
9317   }
9318 
9319   // Update the pointer to point to the next element that needs to be mapped,
9320   // and check whether we have mapped all elements.
9321   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9322       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9323   PtrPHI->addIncoming(PtrNext, LastBB);
9324   llvm::Value *IsDone =
9325       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9326   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9327   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9328 
9329   MapperCGF.EmitBlock(ExitBB);
9330   // Emit array deletion if this is an array section and \p MapType indicates
9331   // that deletion is required.
9332   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9333                              ElementSize, DoneBB, /*IsInit=*/false);
9334 
9335   // Emit the function exit block.
9336   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9337   MapperCGF.FinishFunction();
9338   UDMMap.try_emplace(D, Fn);
9339   if (CGF) {
9340     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9341     Decls.second.push_back(D);
9342   }
9343 }
9344 
9345 /// Emit the array initialization or deletion portion for user-defined mapper
9346 /// code generation. First, it evaluates whether an array section is mapped and
9347 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9348 /// true, and \a MapType indicates to not delete this array, array
9349 /// initialization code is generated. If \a IsInit is false, and \a MapType
9350 /// indicates to not this array, array deletion code is generated.
9351 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9352     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9353     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9354     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9355   StringRef Prefix = IsInit ? ".init" : ".del";
9356 
9357   // Evaluate if this is an array section.
9358   llvm::BasicBlock *IsDeleteBB =
9359       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9360   llvm::BasicBlock *BodyBB =
9361       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9362   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9363       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9364   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9365 
9366   // Evaluate if we are going to delete this section.
9367   MapperCGF.EmitBlock(IsDeleteBB);
9368   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9369       MapType,
9370       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9371   llvm::Value *DeleteCond;
9372   if (IsInit) {
9373     DeleteCond = MapperCGF.Builder.CreateIsNull(
9374         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9375   } else {
9376     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9377         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9378   }
9379   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9380 
9381   MapperCGF.EmitBlock(BodyBB);
9382   // Get the array size by multiplying element size and element number (i.e., \p
9383   // Size).
9384   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9385       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9386   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9387   // memory allocation/deletion purpose only.
9388   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9389       MapType,
9390       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9391                                    MappableExprsHandler::OMP_MAP_FROM)));
9392   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9393   // data structure.
9394   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9395   MapperCGF.EmitRuntimeCall(
9396       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9397                                             OMPRTL___tgt_push_mapper_component),
9398       OffloadingArgs);
9399 }
9400 
9401 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9402     const OMPDeclareMapperDecl *D) {
9403   auto I = UDMMap.find(D);
9404   if (I != UDMMap.end())
9405     return I->second;
9406   emitUserDefinedMapper(D);
9407   return UDMMap.lookup(D);
9408 }
9409 
9410 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9411     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9412     llvm::Value *DeviceID,
9413     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9414                                      const OMPLoopDirective &D)>
9415         SizeEmitter) {
9416   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9417   const OMPExecutableDirective *TD = &D;
9418   // Get nested teams distribute kind directive, if any.
9419   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9420     TD = getNestedDistributeDirective(CGM.getContext(), D);
9421   if (!TD)
9422     return;
9423   const auto *LD = cast<OMPLoopDirective>(TD);
9424   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9425                                                      PrePostActionTy &) {
9426     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9427       llvm::Value *Args[] = {DeviceID, NumIterations};
9428       CGF.EmitRuntimeCall(
9429           OMPBuilder.getOrCreateRuntimeFunction(
9430               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9431           Args);
9432     }
9433   };
9434   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9435 }
9436 
9437 void CGOpenMPRuntime::emitTargetCall(
9438     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9439     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9440     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9441     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9442                                      const OMPLoopDirective &D)>
9443         SizeEmitter) {
9444   if (!CGF.HaveInsertPoint())
9445     return;
9446 
9447   assert(OutlinedFn && "Invalid outlined function!");
9448 
9449   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9450   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9451   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9452   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9453                                             PrePostActionTy &) {
9454     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9455   };
9456   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9457 
9458   CodeGenFunction::OMPTargetDataInfo InputInfo;
9459   llvm::Value *MapTypesArray = nullptr;
9460   // Fill up the pointer arrays and transfer execution to the device.
9461   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9462                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9463                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9464     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9465       // Reverse offloading is not supported, so just execute on the host.
9466       if (RequiresOuterTask) {
9467         CapturedVars.clear();
9468         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9469       }
9470       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9471       return;
9472     }
9473 
9474     // On top of the arrays that were filled up, the target offloading call
9475     // takes as arguments the device id as well as the host pointer. The host
9476     // pointer is used by the runtime library to identify the current target
9477     // region, so it only has to be unique and not necessarily point to
9478     // anything. It could be the pointer to the outlined function that
9479     // implements the target region, but we aren't using that so that the
9480     // compiler doesn't need to keep that, and could therefore inline the host
9481     // function if proven worthwhile during optimization.
9482 
9483     // From this point on, we need to have an ID of the target region defined.
9484     assert(OutlinedFnID && "Invalid outlined function ID!");
9485 
9486     // Emit device ID if any.
9487     llvm::Value *DeviceID;
9488     if (Device.getPointer()) {
9489       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9490               Device.getInt() == OMPC_DEVICE_device_num) &&
9491              "Expected device_num modifier.");
9492       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9493       DeviceID =
9494           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9495     } else {
9496       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9497     }
9498 
9499     // Emit the number of elements in the offloading arrays.
9500     llvm::Value *PointerNum =
9501         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9502 
9503     // Return value of the runtime offloading call.
9504     llvm::Value *Return;
9505 
9506     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9507     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9508 
9509     // Emit tripcount for the target loop-based directive.
9510     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9511 
9512     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9513     // The target region is an outlined function launched by the runtime
9514     // via calls __tgt_target() or __tgt_target_teams().
9515     //
9516     // __tgt_target() launches a target region with one team and one thread,
9517     // executing a serial region.  This master thread may in turn launch
9518     // more threads within its team upon encountering a parallel region,
9519     // however, no additional teams can be launched on the device.
9520     //
9521     // __tgt_target_teams() launches a target region with one or more teams,
9522     // each with one or more threads.  This call is required for target
9523     // constructs such as:
9524     //  'target teams'
9525     //  'target' / 'teams'
9526     //  'target teams distribute parallel for'
9527     //  'target parallel'
9528     // and so on.
9529     //
9530     // Note that on the host and CPU targets, the runtime implementation of
9531     // these calls simply call the outlined function without forking threads.
9532     // The outlined functions themselves have runtime calls to
9533     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9534     // the compiler in emitTeamsCall() and emitParallelCall().
9535     //
9536     // In contrast, on the NVPTX target, the implementation of
9537     // __tgt_target_teams() launches a GPU kernel with the requested number
9538     // of teams and threads so no additional calls to the runtime are required.
9539     if (NumTeams) {
9540       // If we have NumTeams defined this means that we have an enclosed teams
9541       // region. Therefore we also expect to have NumThreads defined. These two
9542       // values should be defined in the presence of a teams directive,
9543       // regardless of having any clauses associated. If the user is using teams
9544       // but no clauses, these two values will be the default that should be
9545       // passed to the runtime library - a 32-bit integer with the value zero.
9546       assert(NumThreads && "Thread limit expression should be available along "
9547                            "with number of teams.");
9548       llvm::Value *OffloadingArgs[] = {DeviceID,
9549                                        OutlinedFnID,
9550                                        PointerNum,
9551                                        InputInfo.BasePointersArray.getPointer(),
9552                                        InputInfo.PointersArray.getPointer(),
9553                                        InputInfo.SizesArray.getPointer(),
9554                                        MapTypesArray,
9555                                        InputInfo.MappersArray.getPointer(),
9556                                        NumTeams,
9557                                        NumThreads};
9558       Return = CGF.EmitRuntimeCall(
9559           OMPBuilder.getOrCreateRuntimeFunction(
9560               CGM.getModule(), HasNowait
9561                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9562                                    : OMPRTL___tgt_target_teams_mapper),
9563           OffloadingArgs);
9564     } else {
9565       llvm::Value *OffloadingArgs[] = {DeviceID,
9566                                        OutlinedFnID,
9567                                        PointerNum,
9568                                        InputInfo.BasePointersArray.getPointer(),
9569                                        InputInfo.PointersArray.getPointer(),
9570                                        InputInfo.SizesArray.getPointer(),
9571                                        MapTypesArray,
9572                                        InputInfo.MappersArray.getPointer()};
9573       Return = CGF.EmitRuntimeCall(
9574           OMPBuilder.getOrCreateRuntimeFunction(
9575               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9576                                          : OMPRTL___tgt_target_mapper),
9577           OffloadingArgs);
9578     }
9579 
9580     // Check the error code and execute the host version if required.
9581     llvm::BasicBlock *OffloadFailedBlock =
9582         CGF.createBasicBlock("omp_offload.failed");
9583     llvm::BasicBlock *OffloadContBlock =
9584         CGF.createBasicBlock("omp_offload.cont");
9585     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9586     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9587 
9588     CGF.EmitBlock(OffloadFailedBlock);
9589     if (RequiresOuterTask) {
9590       CapturedVars.clear();
9591       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9592     }
9593     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9594     CGF.EmitBranch(OffloadContBlock);
9595 
9596     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9597   };
9598 
9599   // Notify that the host version must be executed.
9600   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9601                     RequiresOuterTask](CodeGenFunction &CGF,
9602                                        PrePostActionTy &) {
9603     if (RequiresOuterTask) {
9604       CapturedVars.clear();
9605       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9606     }
9607     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9608   };
9609 
9610   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9611                           &CapturedVars, RequiresOuterTask,
9612                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9613     // Fill up the arrays with all the captured variables.
9614     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9615 
9616     // Get mappable expression information.
9617     MappableExprsHandler MEHandler(D, CGF);
9618     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9619     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9620 
9621     auto RI = CS.getCapturedRecordDecl()->field_begin();
9622     auto CV = CapturedVars.begin();
9623     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9624                                               CE = CS.capture_end();
9625          CI != CE; ++CI, ++RI, ++CV) {
9626       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9627       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9628 
9629       // VLA sizes are passed to the outlined region by copy and do not have map
9630       // information associated.
9631       if (CI->capturesVariableArrayType()) {
9632         CurInfo.BasePointers.push_back(*CV);
9633         CurInfo.Pointers.push_back(*CV);
9634         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9635             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9636         // Copy to the device as an argument. No need to retrieve it.
9637         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9638                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9639                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9640         CurInfo.Mappers.push_back(nullptr);
9641       } else {
9642         // If we have any information in the map clause, we use it, otherwise we
9643         // just do a default mapping.
9644         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9645         if (!CI->capturesThis())
9646           MappedVarSet.insert(CI->getCapturedVar());
9647         else
9648           MappedVarSet.insert(nullptr);
9649         if (CurInfo.BasePointers.empty())
9650           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9651         // Generate correct mapping for variables captured by reference in
9652         // lambdas.
9653         if (CI->capturesVariable())
9654           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9655                                                   CurInfo, LambdaPointers);
9656       }
9657       // We expect to have at least an element of information for this capture.
9658       assert(!CurInfo.BasePointers.empty() &&
9659              "Non-existing map pointer for capture!");
9660       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9661              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9662              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9663              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9664              "Inconsistent map information sizes!");
9665 
9666       // If there is an entry in PartialStruct it means we have a struct with
9667       // individual members mapped. Emit an extra combined entry.
9668       if (PartialStruct.Base.isValid())
9669         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9670 
9671       // We need to append the results of this capture to what we already have.
9672       CombinedInfo.append(CurInfo);
9673     }
9674     // Adjust MEMBER_OF flags for the lambdas captures.
9675     MEHandler.adjustMemberOfForLambdaCaptures(
9676         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9677         CombinedInfo.Types);
9678     // Map any list items in a map clause that were not captures because they
9679     // weren't referenced within the construct.
9680     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9681 
9682     TargetDataInfo Info;
9683     // Fill up the arrays and create the arguments.
9684     emitOffloadingArrays(CGF, CombinedInfo, Info);
9685     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9686                                  Info.PointersArray, Info.SizesArray,
9687                                  Info.MapTypesArray, Info.MappersArray, Info);
9688     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9689     InputInfo.BasePointersArray =
9690         Address(Info.BasePointersArray, CGM.getPointerAlign());
9691     InputInfo.PointersArray =
9692         Address(Info.PointersArray, CGM.getPointerAlign());
9693     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9694     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9695     MapTypesArray = Info.MapTypesArray;
9696     if (RequiresOuterTask)
9697       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9698     else
9699       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9700   };
9701 
9702   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9703                              CodeGenFunction &CGF, PrePostActionTy &) {
9704     if (RequiresOuterTask) {
9705       CodeGenFunction::OMPTargetDataInfo InputInfo;
9706       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9707     } else {
9708       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9709     }
9710   };
9711 
9712   // If we have a target function ID it means that we need to support
9713   // offloading, otherwise, just execute on the host. We need to execute on host
9714   // regardless of the conditional in the if clause if, e.g., the user do not
9715   // specify target triples.
9716   if (OutlinedFnID) {
9717     if (IfCond) {
9718       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9719     } else {
9720       RegionCodeGenTy ThenRCG(TargetThenGen);
9721       ThenRCG(CGF);
9722     }
9723   } else {
9724     RegionCodeGenTy ElseRCG(TargetElseGen);
9725     ElseRCG(CGF);
9726   }
9727 }
9728 
9729 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9730                                                     StringRef ParentName) {
9731   if (!S)
9732     return;
9733 
9734   // Codegen OMP target directives that offload compute to the device.
9735   bool RequiresDeviceCodegen =
9736       isa<OMPExecutableDirective>(S) &&
9737       isOpenMPTargetExecutionDirective(
9738           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9739 
9740   if (RequiresDeviceCodegen) {
9741     const auto &E = *cast<OMPExecutableDirective>(S);
9742     unsigned DeviceID;
9743     unsigned FileID;
9744     unsigned Line;
9745     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9746                              FileID, Line);
9747 
9748     // Is this a target region that should not be emitted as an entry point? If
9749     // so just signal we are done with this target region.
9750     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9751                                                             ParentName, Line))
9752       return;
9753 
9754     switch (E.getDirectiveKind()) {
9755     case OMPD_target:
9756       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9757                                                    cast<OMPTargetDirective>(E));
9758       break;
9759     case OMPD_target_parallel:
9760       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9761           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9762       break;
9763     case OMPD_target_teams:
9764       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9765           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9766       break;
9767     case OMPD_target_teams_distribute:
9768       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9769           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9770       break;
9771     case OMPD_target_teams_distribute_simd:
9772       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9773           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9774       break;
9775     case OMPD_target_parallel_for:
9776       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9777           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9778       break;
9779     case OMPD_target_parallel_for_simd:
9780       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9781           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9782       break;
9783     case OMPD_target_simd:
9784       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9785           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9786       break;
9787     case OMPD_target_teams_distribute_parallel_for:
9788       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9789           CGM, ParentName,
9790           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9791       break;
9792     case OMPD_target_teams_distribute_parallel_for_simd:
9793       CodeGenFunction::
9794           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9795               CGM, ParentName,
9796               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9797       break;
9798     case OMPD_parallel:
9799     case OMPD_for:
9800     case OMPD_parallel_for:
9801     case OMPD_parallel_master:
9802     case OMPD_parallel_sections:
9803     case OMPD_for_simd:
9804     case OMPD_parallel_for_simd:
9805     case OMPD_cancel:
9806     case OMPD_cancellation_point:
9807     case OMPD_ordered:
9808     case OMPD_threadprivate:
9809     case OMPD_allocate:
9810     case OMPD_task:
9811     case OMPD_simd:
9812     case OMPD_sections:
9813     case OMPD_section:
9814     case OMPD_single:
9815     case OMPD_master:
9816     case OMPD_critical:
9817     case OMPD_taskyield:
9818     case OMPD_barrier:
9819     case OMPD_taskwait:
9820     case OMPD_taskgroup:
9821     case OMPD_atomic:
9822     case OMPD_flush:
9823     case OMPD_depobj:
9824     case OMPD_scan:
9825     case OMPD_teams:
9826     case OMPD_target_data:
9827     case OMPD_target_exit_data:
9828     case OMPD_target_enter_data:
9829     case OMPD_distribute:
9830     case OMPD_distribute_simd:
9831     case OMPD_distribute_parallel_for:
9832     case OMPD_distribute_parallel_for_simd:
9833     case OMPD_teams_distribute:
9834     case OMPD_teams_distribute_simd:
9835     case OMPD_teams_distribute_parallel_for:
9836     case OMPD_teams_distribute_parallel_for_simd:
9837     case OMPD_target_update:
9838     case OMPD_declare_simd:
9839     case OMPD_declare_variant:
9840     case OMPD_begin_declare_variant:
9841     case OMPD_end_declare_variant:
9842     case OMPD_declare_target:
9843     case OMPD_end_declare_target:
9844     case OMPD_declare_reduction:
9845     case OMPD_declare_mapper:
9846     case OMPD_taskloop:
9847     case OMPD_taskloop_simd:
9848     case OMPD_master_taskloop:
9849     case OMPD_master_taskloop_simd:
9850     case OMPD_parallel_master_taskloop:
9851     case OMPD_parallel_master_taskloop_simd:
9852     case OMPD_requires:
9853     case OMPD_unknown:
9854     default:
9855       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9856     }
9857     return;
9858   }
9859 
9860   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9861     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9862       return;
9863 
9864     scanForTargetRegionsFunctions(
9865         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9866     return;
9867   }
9868 
9869   // If this is a lambda function, look into its body.
9870   if (const auto *L = dyn_cast<LambdaExpr>(S))
9871     S = L->getBody();
9872 
9873   // Keep looking for target regions recursively.
9874   for (const Stmt *II : S->children())
9875     scanForTargetRegionsFunctions(II, ParentName);
9876 }
9877 
9878 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9879   // If emitting code for the host, we do not process FD here. Instead we do
9880   // the normal code generation.
9881   if (!CGM.getLangOpts().OpenMPIsDevice) {
9882     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9883       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9884           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9885       // Do not emit device_type(nohost) functions for the host.
9886       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9887         return true;
9888     }
9889     return false;
9890   }
9891 
9892   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9893   // Try to detect target regions in the function.
9894   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9895     StringRef Name = CGM.getMangledName(GD);
9896     scanForTargetRegionsFunctions(FD->getBody(), Name);
9897     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9898         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9899     // Do not emit device_type(nohost) functions for the host.
9900     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9901       return true;
9902   }
9903 
9904   // Do not to emit function if it is not marked as declare target.
9905   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9906          AlreadyEmittedTargetDecls.count(VD) == 0;
9907 }
9908 
9909 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9910   if (!CGM.getLangOpts().OpenMPIsDevice)
9911     return false;
9912 
9913   // Check if there are Ctors/Dtors in this declaration and look for target
9914   // regions in it. We use the complete variant to produce the kernel name
9915   // mangling.
9916   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9917   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9918     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9919       StringRef ParentName =
9920           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9921       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9922     }
9923     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9924       StringRef ParentName =
9925           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9926       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9927     }
9928   }
9929 
9930   // Do not to emit variable if it is not marked as declare target.
9931   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9932       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9933           cast<VarDecl>(GD.getDecl()));
9934   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9935       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9936        HasRequiresUnifiedSharedMemory)) {
9937     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9938     return true;
9939   }
9940   return false;
9941 }
9942 
9943 llvm::Constant *
9944 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9945                                                 const VarDecl *VD) {
9946   assert(VD->getType().isConstant(CGM.getContext()) &&
9947          "Expected constant variable.");
9948   StringRef VarName;
9949   llvm::Constant *Addr;
9950   llvm::GlobalValue::LinkageTypes Linkage;
9951   QualType Ty = VD->getType();
9952   SmallString<128> Buffer;
9953   {
9954     unsigned DeviceID;
9955     unsigned FileID;
9956     unsigned Line;
9957     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9958                              FileID, Line);
9959     llvm::raw_svector_ostream OS(Buffer);
9960     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9961        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9962     VarName = OS.str();
9963   }
9964   Linkage = llvm::GlobalValue::InternalLinkage;
9965   Addr =
9966       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9967                                   getDefaultFirstprivateAddressSpace());
9968   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9969   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9970   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9971   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9972       VarName, Addr, VarSize,
9973       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9974   return Addr;
9975 }
9976 
9977 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9978                                                    llvm::Constant *Addr) {
9979   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9980       !CGM.getLangOpts().OpenMPIsDevice)
9981     return;
9982   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9983       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9984   if (!Res) {
9985     if (CGM.getLangOpts().OpenMPIsDevice) {
9986       // Register non-target variables being emitted in device code (debug info
9987       // may cause this).
9988       StringRef VarName = CGM.getMangledName(VD);
9989       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9990     }
9991     return;
9992   }
9993   // Register declare target variables.
9994   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9995   StringRef VarName;
9996   CharUnits VarSize;
9997   llvm::GlobalValue::LinkageTypes Linkage;
9998 
9999   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10000       !HasRequiresUnifiedSharedMemory) {
10001     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10002     VarName = CGM.getMangledName(VD);
10003     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10004       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10005       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10006     } else {
10007       VarSize = CharUnits::Zero();
10008     }
10009     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10010     // Temp solution to prevent optimizations of the internal variables.
10011     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10012       std::string RefName = getName({VarName, "ref"});
10013       if (!CGM.GetGlobalValue(RefName)) {
10014         llvm::Constant *AddrRef =
10015             getOrCreateInternalVariable(Addr->getType(), RefName);
10016         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10017         GVAddrRef->setConstant(/*Val=*/true);
10018         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10019         GVAddrRef->setInitializer(Addr);
10020         CGM.addCompilerUsedGlobal(GVAddrRef);
10021       }
10022     }
10023   } else {
10024     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10025             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10026              HasRequiresUnifiedSharedMemory)) &&
10027            "Declare target attribute must link or to with unified memory.");
10028     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10029       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10030     else
10031       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10032 
10033     if (CGM.getLangOpts().OpenMPIsDevice) {
10034       VarName = Addr->getName();
10035       Addr = nullptr;
10036     } else {
10037       VarName = getAddrOfDeclareTargetVar(VD).getName();
10038       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10039     }
10040     VarSize = CGM.getPointerSize();
10041     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10042   }
10043 
10044   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10045       VarName, Addr, VarSize, Flags, Linkage);
10046 }
10047 
10048 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10049   if (isa<FunctionDecl>(GD.getDecl()) ||
10050       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10051     return emitTargetFunctions(GD);
10052 
10053   return emitTargetGlobalVariable(GD);
10054 }
10055 
10056 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10057   for (const VarDecl *VD : DeferredGlobalVariables) {
10058     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10059         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10060     if (!Res)
10061       continue;
10062     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10063         !HasRequiresUnifiedSharedMemory) {
10064       CGM.EmitGlobal(VD);
10065     } else {
10066       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10067               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10068                HasRequiresUnifiedSharedMemory)) &&
10069              "Expected link clause or to clause with unified memory.");
10070       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10071     }
10072   }
10073 }
10074 
10075 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10076     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10077   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10078          " Expected target-based directive.");
10079 }
10080 
10081 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10082   for (const OMPClause *Clause : D->clauselists()) {
10083     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10084       HasRequiresUnifiedSharedMemory = true;
10085     } else if (const auto *AC =
10086                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10087       switch (AC->getAtomicDefaultMemOrderKind()) {
10088       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10089         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10090         break;
10091       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10092         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10093         break;
10094       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10095         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10096         break;
10097       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10098         break;
10099       }
10100     }
10101   }
10102 }
10103 
10104 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10105   return RequiresAtomicOrdering;
10106 }
10107 
10108 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10109                                                        LangAS &AS) {
10110   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10111     return false;
10112   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10113   switch(A->getAllocatorType()) {
10114   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10115   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10116   // Not supported, fallback to the default mem space.
10117   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10118   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10119   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10120   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10121   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10122   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10123   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10124     AS = LangAS::Default;
10125     return true;
10126   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10127     llvm_unreachable("Expected predefined allocator for the variables with the "
10128                      "static storage.");
10129   }
10130   return false;
10131 }
10132 
10133 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10134   return HasRequiresUnifiedSharedMemory;
10135 }
10136 
10137 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10138     CodeGenModule &CGM)
10139     : CGM(CGM) {
10140   if (CGM.getLangOpts().OpenMPIsDevice) {
10141     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10142     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10143   }
10144 }
10145 
10146 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10147   if (CGM.getLangOpts().OpenMPIsDevice)
10148     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10149 }
10150 
10151 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10152   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10153     return true;
10154 
10155   const auto *D = cast<FunctionDecl>(GD.getDecl());
10156   // Do not to emit function if it is marked as declare target as it was already
10157   // emitted.
10158   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10159     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10160       if (auto *F = dyn_cast_or_null<llvm::Function>(
10161               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10162         return !F->isDeclaration();
10163       return false;
10164     }
10165     return true;
10166   }
10167 
10168   return !AlreadyEmittedTargetDecls.insert(D).second;
10169 }
10170 
10171 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10172   // If we don't have entries or if we are emitting code for the device, we
10173   // don't need to do anything.
10174   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10175       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10176       (OffloadEntriesInfoManager.empty() &&
10177        !HasEmittedDeclareTargetRegion &&
10178        !HasEmittedTargetRegion))
10179     return nullptr;
10180 
10181   // Create and register the function that handles the requires directives.
10182   ASTContext &C = CGM.getContext();
10183 
10184   llvm::Function *RequiresRegFn;
10185   {
10186     CodeGenFunction CGF(CGM);
10187     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10188     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10189     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10190     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10191     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10192     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10193     // TODO: check for other requires clauses.
10194     // The requires directive takes effect only when a target region is
10195     // present in the compilation unit. Otherwise it is ignored and not
10196     // passed to the runtime. This avoids the runtime from throwing an error
10197     // for mismatching requires clauses across compilation units that don't
10198     // contain at least 1 target region.
10199     assert((HasEmittedTargetRegion ||
10200             HasEmittedDeclareTargetRegion ||
10201             !OffloadEntriesInfoManager.empty()) &&
10202            "Target or declare target region expected.");
10203     if (HasRequiresUnifiedSharedMemory)
10204       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10205     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10206                             CGM.getModule(), OMPRTL___tgt_register_requires),
10207                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10208     CGF.FinishFunction();
10209   }
10210   return RequiresRegFn;
10211 }
10212 
10213 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10214                                     const OMPExecutableDirective &D,
10215                                     SourceLocation Loc,
10216                                     llvm::Function *OutlinedFn,
10217                                     ArrayRef<llvm::Value *> CapturedVars) {
10218   if (!CGF.HaveInsertPoint())
10219     return;
10220 
10221   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10222   CodeGenFunction::RunCleanupsScope Scope(CGF);
10223 
10224   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10225   llvm::Value *Args[] = {
10226       RTLoc,
10227       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10228       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10229   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10230   RealArgs.append(std::begin(Args), std::end(Args));
10231   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10232 
10233   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10234       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10235   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10236 }
10237 
10238 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10239                                          const Expr *NumTeams,
10240                                          const Expr *ThreadLimit,
10241                                          SourceLocation Loc) {
10242   if (!CGF.HaveInsertPoint())
10243     return;
10244 
10245   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10246 
10247   llvm::Value *NumTeamsVal =
10248       NumTeams
10249           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10250                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10251           : CGF.Builder.getInt32(0);
10252 
10253   llvm::Value *ThreadLimitVal =
10254       ThreadLimit
10255           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10256                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10257           : CGF.Builder.getInt32(0);
10258 
10259   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10260   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10261                                      ThreadLimitVal};
10262   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10263                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10264                       PushNumTeamsArgs);
10265 }
10266 
10267 void CGOpenMPRuntime::emitTargetDataCalls(
10268     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10269     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10270   if (!CGF.HaveInsertPoint())
10271     return;
10272 
10273   // Action used to replace the default codegen action and turn privatization
10274   // off.
10275   PrePostActionTy NoPrivAction;
10276 
10277   // Generate the code for the opening of the data environment. Capture all the
10278   // arguments of the runtime call by reference because they are used in the
10279   // closing of the region.
10280   auto &&BeginThenGen = [this, &D, Device, &Info,
10281                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10282     // Fill up the arrays with all the mapped variables.
10283     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10284 
10285     // Get map clause information.
10286     MappableExprsHandler MEHandler(D, CGF);
10287     MEHandler.generateAllInfo(CombinedInfo);
10288 
10289     // Fill up the arrays and create the arguments.
10290     emitOffloadingArrays(CGF, CombinedInfo, Info);
10291 
10292     llvm::Value *BasePointersArrayArg = nullptr;
10293     llvm::Value *PointersArrayArg = nullptr;
10294     llvm::Value *SizesArrayArg = nullptr;
10295     llvm::Value *MapTypesArrayArg = nullptr;
10296     llvm::Value *MappersArrayArg = nullptr;
10297     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10298                                  SizesArrayArg, MapTypesArrayArg,
10299                                  MappersArrayArg, Info, /*ForEndCall=*/false);
10300 
10301     // Emit device ID if any.
10302     llvm::Value *DeviceID = nullptr;
10303     if (Device) {
10304       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10305                                            CGF.Int64Ty, /*isSigned=*/true);
10306     } else {
10307       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10308     }
10309 
10310     // Emit the number of elements in the offloading arrays.
10311     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10312 
10313     llvm::Value *OffloadingArgs[] = {
10314         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10315         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10316     CGF.EmitRuntimeCall(
10317         OMPBuilder.getOrCreateRuntimeFunction(
10318             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10319         OffloadingArgs);
10320 
10321     // If device pointer privatization is required, emit the body of the region
10322     // here. It will have to be duplicated: with and without privatization.
10323     if (!Info.CaptureDeviceAddrMap.empty())
10324       CodeGen(CGF);
10325   };
10326 
10327   // Generate code for the closing of the data region.
10328   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10329                                             PrePostActionTy &) {
10330     assert(Info.isValid() && "Invalid data environment closing arguments.");
10331 
10332     llvm::Value *BasePointersArrayArg = nullptr;
10333     llvm::Value *PointersArrayArg = nullptr;
10334     llvm::Value *SizesArrayArg = nullptr;
10335     llvm::Value *MapTypesArrayArg = nullptr;
10336     llvm::Value *MappersArrayArg = nullptr;
10337     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10338                                  SizesArrayArg, MapTypesArrayArg,
10339                                  MappersArrayArg, Info, /*ForEndCall=*/true);
10340 
10341     // Emit device ID if any.
10342     llvm::Value *DeviceID = nullptr;
10343     if (Device) {
10344       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10345                                            CGF.Int64Ty, /*isSigned=*/true);
10346     } else {
10347       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10348     }
10349 
10350     // Emit the number of elements in the offloading arrays.
10351     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10352 
10353     llvm::Value *OffloadingArgs[] = {
10354         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10355         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10356     CGF.EmitRuntimeCall(
10357         OMPBuilder.getOrCreateRuntimeFunction(
10358             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10359         OffloadingArgs);
10360   };
10361 
10362   // If we need device pointer privatization, we need to emit the body of the
10363   // region with no privatization in the 'else' branch of the conditional.
10364   // Otherwise, we don't have to do anything.
10365   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10366                                                          PrePostActionTy &) {
10367     if (!Info.CaptureDeviceAddrMap.empty()) {
10368       CodeGen.setAction(NoPrivAction);
10369       CodeGen(CGF);
10370     }
10371   };
10372 
10373   // We don't have to do anything to close the region if the if clause evaluates
10374   // to false.
10375   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10376 
10377   if (IfCond) {
10378     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10379   } else {
10380     RegionCodeGenTy RCG(BeginThenGen);
10381     RCG(CGF);
10382   }
10383 
10384   // If we don't require privatization of device pointers, we emit the body in
10385   // between the runtime calls. This avoids duplicating the body code.
10386   if (Info.CaptureDeviceAddrMap.empty()) {
10387     CodeGen.setAction(NoPrivAction);
10388     CodeGen(CGF);
10389   }
10390 
10391   if (IfCond) {
10392     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10393   } else {
10394     RegionCodeGenTy RCG(EndThenGen);
10395     RCG(CGF);
10396   }
10397 }
10398 
10399 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10400     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10401     const Expr *Device) {
10402   if (!CGF.HaveInsertPoint())
10403     return;
10404 
10405   assert((isa<OMPTargetEnterDataDirective>(D) ||
10406           isa<OMPTargetExitDataDirective>(D) ||
10407           isa<OMPTargetUpdateDirective>(D)) &&
10408          "Expecting either target enter, exit data, or update directives.");
10409 
10410   CodeGenFunction::OMPTargetDataInfo InputInfo;
10411   llvm::Value *MapTypesArray = nullptr;
10412   // Generate the code for the opening of the data environment.
10413   auto &&ThenGen = [this, &D, Device, &InputInfo,
10414                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10415     // Emit device ID if any.
10416     llvm::Value *DeviceID = nullptr;
10417     if (Device) {
10418       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10419                                            CGF.Int64Ty, /*isSigned=*/true);
10420     } else {
10421       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10422     }
10423 
10424     // Emit the number of elements in the offloading arrays.
10425     llvm::Constant *PointerNum =
10426         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10427 
10428     llvm::Value *OffloadingArgs[] = {DeviceID,
10429                                      PointerNum,
10430                                      InputInfo.BasePointersArray.getPointer(),
10431                                      InputInfo.PointersArray.getPointer(),
10432                                      InputInfo.SizesArray.getPointer(),
10433                                      MapTypesArray,
10434                                      InputInfo.MappersArray.getPointer()};
10435 
10436     // Select the right runtime function call for each standalone
10437     // directive.
10438     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10439     RuntimeFunction RTLFn;
10440     switch (D.getDirectiveKind()) {
10441     case OMPD_target_enter_data:
10442       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10443                         : OMPRTL___tgt_target_data_begin_mapper;
10444       break;
10445     case OMPD_target_exit_data:
10446       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10447                         : OMPRTL___tgt_target_data_end_mapper;
10448       break;
10449     case OMPD_target_update:
10450       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10451                         : OMPRTL___tgt_target_data_update_mapper;
10452       break;
10453     case OMPD_parallel:
10454     case OMPD_for:
10455     case OMPD_parallel_for:
10456     case OMPD_parallel_master:
10457     case OMPD_parallel_sections:
10458     case OMPD_for_simd:
10459     case OMPD_parallel_for_simd:
10460     case OMPD_cancel:
10461     case OMPD_cancellation_point:
10462     case OMPD_ordered:
10463     case OMPD_threadprivate:
10464     case OMPD_allocate:
10465     case OMPD_task:
10466     case OMPD_simd:
10467     case OMPD_sections:
10468     case OMPD_section:
10469     case OMPD_single:
10470     case OMPD_master:
10471     case OMPD_critical:
10472     case OMPD_taskyield:
10473     case OMPD_barrier:
10474     case OMPD_taskwait:
10475     case OMPD_taskgroup:
10476     case OMPD_atomic:
10477     case OMPD_flush:
10478     case OMPD_depobj:
10479     case OMPD_scan:
10480     case OMPD_teams:
10481     case OMPD_target_data:
10482     case OMPD_distribute:
10483     case OMPD_distribute_simd:
10484     case OMPD_distribute_parallel_for:
10485     case OMPD_distribute_parallel_for_simd:
10486     case OMPD_teams_distribute:
10487     case OMPD_teams_distribute_simd:
10488     case OMPD_teams_distribute_parallel_for:
10489     case OMPD_teams_distribute_parallel_for_simd:
10490     case OMPD_declare_simd:
10491     case OMPD_declare_variant:
10492     case OMPD_begin_declare_variant:
10493     case OMPD_end_declare_variant:
10494     case OMPD_declare_target:
10495     case OMPD_end_declare_target:
10496     case OMPD_declare_reduction:
10497     case OMPD_declare_mapper:
10498     case OMPD_taskloop:
10499     case OMPD_taskloop_simd:
10500     case OMPD_master_taskloop:
10501     case OMPD_master_taskloop_simd:
10502     case OMPD_parallel_master_taskloop:
10503     case OMPD_parallel_master_taskloop_simd:
10504     case OMPD_target:
10505     case OMPD_target_simd:
10506     case OMPD_target_teams_distribute:
10507     case OMPD_target_teams_distribute_simd:
10508     case OMPD_target_teams_distribute_parallel_for:
10509     case OMPD_target_teams_distribute_parallel_for_simd:
10510     case OMPD_target_teams:
10511     case OMPD_target_parallel:
10512     case OMPD_target_parallel_for:
10513     case OMPD_target_parallel_for_simd:
10514     case OMPD_requires:
10515     case OMPD_unknown:
10516     default:
10517       llvm_unreachable("Unexpected standalone target data directive.");
10518       break;
10519     }
10520     CGF.EmitRuntimeCall(
10521         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10522         OffloadingArgs);
10523   };
10524 
10525   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10526                              CodeGenFunction &CGF, PrePostActionTy &) {
10527     // Fill up the arrays with all the mapped variables.
10528     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10529 
10530     // Get map clause information.
10531     MappableExprsHandler MEHandler(D, CGF);
10532     MEHandler.generateAllInfo(CombinedInfo);
10533 
10534     TargetDataInfo Info;
10535     // Fill up the arrays and create the arguments.
10536     emitOffloadingArrays(CGF, CombinedInfo, Info);
10537     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10538                                  Info.PointersArray, Info.SizesArray,
10539                                  Info.MapTypesArray, Info.MappersArray, Info);
10540     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10541     InputInfo.BasePointersArray =
10542         Address(Info.BasePointersArray, CGM.getPointerAlign());
10543     InputInfo.PointersArray =
10544         Address(Info.PointersArray, CGM.getPointerAlign());
10545     InputInfo.SizesArray =
10546         Address(Info.SizesArray, CGM.getPointerAlign());
10547     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10548     MapTypesArray = Info.MapTypesArray;
10549     if (D.hasClausesOfKind<OMPDependClause>())
10550       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10551     else
10552       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10553   };
10554 
10555   if (IfCond) {
10556     emitIfClause(CGF, IfCond, TargetThenGen,
10557                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10558   } else {
10559     RegionCodeGenTy ThenRCG(TargetThenGen);
10560     ThenRCG(CGF);
10561   }
10562 }
10563 
10564 namespace {
10565   /// Kind of parameter in a function with 'declare simd' directive.
10566   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10567   /// Attribute set of the parameter.
10568   struct ParamAttrTy {
10569     ParamKindTy Kind = Vector;
10570     llvm::APSInt StrideOrArg;
10571     llvm::APSInt Alignment;
10572   };
10573 } // namespace
10574 
10575 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10576                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10577   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10578   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10579   // of that clause. The VLEN value must be power of 2.
10580   // In other case the notion of the function`s "characteristic data type" (CDT)
10581   // is used to compute the vector length.
10582   // CDT is defined in the following order:
10583   //   a) For non-void function, the CDT is the return type.
10584   //   b) If the function has any non-uniform, non-linear parameters, then the
10585   //   CDT is the type of the first such parameter.
10586   //   c) If the CDT determined by a) or b) above is struct, union, or class
10587   //   type which is pass-by-value (except for the type that maps to the
10588   //   built-in complex data type), the characteristic data type is int.
10589   //   d) If none of the above three cases is applicable, the CDT is int.
10590   // The VLEN is then determined based on the CDT and the size of vector
10591   // register of that ISA for which current vector version is generated. The
10592   // VLEN is computed using the formula below:
10593   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10594   // where vector register size specified in section 3.2.1 Registers and the
10595   // Stack Frame of original AMD64 ABI document.
10596   QualType RetType = FD->getReturnType();
10597   if (RetType.isNull())
10598     return 0;
10599   ASTContext &C = FD->getASTContext();
10600   QualType CDT;
10601   if (!RetType.isNull() && !RetType->isVoidType()) {
10602     CDT = RetType;
10603   } else {
10604     unsigned Offset = 0;
10605     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10606       if (ParamAttrs[Offset].Kind == Vector)
10607         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10608       ++Offset;
10609     }
10610     if (CDT.isNull()) {
10611       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10612         if (ParamAttrs[I + Offset].Kind == Vector) {
10613           CDT = FD->getParamDecl(I)->getType();
10614           break;
10615         }
10616       }
10617     }
10618   }
10619   if (CDT.isNull())
10620     CDT = C.IntTy;
10621   CDT = CDT->getCanonicalTypeUnqualified();
10622   if (CDT->isRecordType() || CDT->isUnionType())
10623     CDT = C.IntTy;
10624   return C.getTypeSize(CDT);
10625 }
10626 
10627 static void
10628 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10629                            const llvm::APSInt &VLENVal,
10630                            ArrayRef<ParamAttrTy> ParamAttrs,
10631                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10632   struct ISADataTy {
10633     char ISA;
10634     unsigned VecRegSize;
10635   };
10636   ISADataTy ISAData[] = {
10637       {
10638           'b', 128
10639       }, // SSE
10640       {
10641           'c', 256
10642       }, // AVX
10643       {
10644           'd', 256
10645       }, // AVX2
10646       {
10647           'e', 512
10648       }, // AVX512
10649   };
10650   llvm::SmallVector<char, 2> Masked;
10651   switch (State) {
10652   case OMPDeclareSimdDeclAttr::BS_Undefined:
10653     Masked.push_back('N');
10654     Masked.push_back('M');
10655     break;
10656   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10657     Masked.push_back('N');
10658     break;
10659   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10660     Masked.push_back('M');
10661     break;
10662   }
10663   for (char Mask : Masked) {
10664     for (const ISADataTy &Data : ISAData) {
10665       SmallString<256> Buffer;
10666       llvm::raw_svector_ostream Out(Buffer);
10667       Out << "_ZGV" << Data.ISA << Mask;
10668       if (!VLENVal) {
10669         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10670         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10671         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10672       } else {
10673         Out << VLENVal;
10674       }
10675       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10676         switch (ParamAttr.Kind){
10677         case LinearWithVarStride:
10678           Out << 's' << ParamAttr.StrideOrArg;
10679           break;
10680         case Linear:
10681           Out << 'l';
10682           if (ParamAttr.StrideOrArg != 1)
10683             Out << ParamAttr.StrideOrArg;
10684           break;
10685         case Uniform:
10686           Out << 'u';
10687           break;
10688         case Vector:
10689           Out << 'v';
10690           break;
10691         }
10692         if (!!ParamAttr.Alignment)
10693           Out << 'a' << ParamAttr.Alignment;
10694       }
10695       Out << '_' << Fn->getName();
10696       Fn->addFnAttr(Out.str());
10697     }
10698   }
10699 }
10700 
10701 // This are the Functions that are needed to mangle the name of the
10702 // vector functions generated by the compiler, according to the rules
10703 // defined in the "Vector Function ABI specifications for AArch64",
10704 // available at
10705 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10706 
10707 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10708 ///
10709 /// TODO: Need to implement the behavior for reference marked with a
10710 /// var or no linear modifiers (1.b in the section). For this, we
10711 /// need to extend ParamKindTy to support the linear modifiers.
10712 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10713   QT = QT.getCanonicalType();
10714 
10715   if (QT->isVoidType())
10716     return false;
10717 
10718   if (Kind == ParamKindTy::Uniform)
10719     return false;
10720 
10721   if (Kind == ParamKindTy::Linear)
10722     return false;
10723 
10724   // TODO: Handle linear references with modifiers
10725 
10726   if (Kind == ParamKindTy::LinearWithVarStride)
10727     return false;
10728 
10729   return true;
10730 }
10731 
10732 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10733 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10734   QT = QT.getCanonicalType();
10735   unsigned Size = C.getTypeSize(QT);
10736 
10737   // Only scalars and complex within 16 bytes wide set PVB to true.
10738   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10739     return false;
10740 
10741   if (QT->isFloatingType())
10742     return true;
10743 
10744   if (QT->isIntegerType())
10745     return true;
10746 
10747   if (QT->isPointerType())
10748     return true;
10749 
10750   // TODO: Add support for complex types (section 3.1.2, item 2).
10751 
10752   return false;
10753 }
10754 
10755 /// Computes the lane size (LS) of a return type or of an input parameter,
10756 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10757 /// TODO: Add support for references, section 3.2.1, item 1.
10758 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10759   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10760     QualType PTy = QT.getCanonicalType()->getPointeeType();
10761     if (getAArch64PBV(PTy, C))
10762       return C.getTypeSize(PTy);
10763   }
10764   if (getAArch64PBV(QT, C))
10765     return C.getTypeSize(QT);
10766 
10767   return C.getTypeSize(C.getUIntPtrType());
10768 }
10769 
10770 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10771 // signature of the scalar function, as defined in 3.2.2 of the
10772 // AAVFABI.
10773 static std::tuple<unsigned, unsigned, bool>
10774 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10775   QualType RetType = FD->getReturnType().getCanonicalType();
10776 
10777   ASTContext &C = FD->getASTContext();
10778 
10779   bool OutputBecomesInput = false;
10780 
10781   llvm::SmallVector<unsigned, 8> Sizes;
10782   if (!RetType->isVoidType()) {
10783     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10784     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10785       OutputBecomesInput = true;
10786   }
10787   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10788     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10789     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10790   }
10791 
10792   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10793   // The LS of a function parameter / return value can only be a power
10794   // of 2, starting from 8 bits, up to 128.
10795   assert(std::all_of(Sizes.begin(), Sizes.end(),
10796                      [](unsigned Size) {
10797                        return Size == 8 || Size == 16 || Size == 32 ||
10798                               Size == 64 || Size == 128;
10799                      }) &&
10800          "Invalid size");
10801 
10802   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10803                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10804                          OutputBecomesInput);
10805 }
10806 
10807 /// Mangle the parameter part of the vector function name according to
10808 /// their OpenMP classification. The mangling function is defined in
10809 /// section 3.5 of the AAVFABI.
10810 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10811   SmallString<256> Buffer;
10812   llvm::raw_svector_ostream Out(Buffer);
10813   for (const auto &ParamAttr : ParamAttrs) {
10814     switch (ParamAttr.Kind) {
10815     case LinearWithVarStride:
10816       Out << "ls" << ParamAttr.StrideOrArg;
10817       break;
10818     case Linear:
10819       Out << 'l';
10820       // Don't print the step value if it is not present or if it is
10821       // equal to 1.
10822       if (ParamAttr.StrideOrArg != 1)
10823         Out << ParamAttr.StrideOrArg;
10824       break;
10825     case Uniform:
10826       Out << 'u';
10827       break;
10828     case Vector:
10829       Out << 'v';
10830       break;
10831     }
10832 
10833     if (!!ParamAttr.Alignment)
10834       Out << 'a' << ParamAttr.Alignment;
10835   }
10836 
10837   return std::string(Out.str());
10838 }
10839 
10840 // Function used to add the attribute. The parameter `VLEN` is
10841 // templated to allow the use of "x" when targeting scalable functions
10842 // for SVE.
10843 template <typename T>
10844 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10845                                  char ISA, StringRef ParSeq,
10846                                  StringRef MangledName, bool OutputBecomesInput,
10847                                  llvm::Function *Fn) {
10848   SmallString<256> Buffer;
10849   llvm::raw_svector_ostream Out(Buffer);
10850   Out << Prefix << ISA << LMask << VLEN;
10851   if (OutputBecomesInput)
10852     Out << "v";
10853   Out << ParSeq << "_" << MangledName;
10854   Fn->addFnAttr(Out.str());
10855 }
10856 
10857 // Helper function to generate the Advanced SIMD names depending on
10858 // the value of the NDS when simdlen is not present.
10859 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10860                                       StringRef Prefix, char ISA,
10861                                       StringRef ParSeq, StringRef MangledName,
10862                                       bool OutputBecomesInput,
10863                                       llvm::Function *Fn) {
10864   switch (NDS) {
10865   case 8:
10866     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10867                          OutputBecomesInput, Fn);
10868     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10869                          OutputBecomesInput, Fn);
10870     break;
10871   case 16:
10872     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10873                          OutputBecomesInput, Fn);
10874     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10875                          OutputBecomesInput, Fn);
10876     break;
10877   case 32:
10878     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10879                          OutputBecomesInput, Fn);
10880     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10881                          OutputBecomesInput, Fn);
10882     break;
10883   case 64:
10884   case 128:
10885     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10886                          OutputBecomesInput, Fn);
10887     break;
10888   default:
10889     llvm_unreachable("Scalar type is too wide.");
10890   }
10891 }
10892 
10893 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10894 static void emitAArch64DeclareSimdFunction(
10895     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10896     ArrayRef<ParamAttrTy> ParamAttrs,
10897     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10898     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10899 
10900   // Get basic data for building the vector signature.
10901   const auto Data = getNDSWDS(FD, ParamAttrs);
10902   const unsigned NDS = std::get<0>(Data);
10903   const unsigned WDS = std::get<1>(Data);
10904   const bool OutputBecomesInput = std::get<2>(Data);
10905 
10906   // Check the values provided via `simdlen` by the user.
10907   // 1. A `simdlen(1)` doesn't produce vector signatures,
10908   if (UserVLEN == 1) {
10909     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10910         DiagnosticsEngine::Warning,
10911         "The clause simdlen(1) has no effect when targeting aarch64.");
10912     CGM.getDiags().Report(SLoc, DiagID);
10913     return;
10914   }
10915 
10916   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10917   // Advanced SIMD output.
10918   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10919     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10920         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10921                                     "power of 2 when targeting Advanced SIMD.");
10922     CGM.getDiags().Report(SLoc, DiagID);
10923     return;
10924   }
10925 
10926   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10927   // limits.
10928   if (ISA == 's' && UserVLEN != 0) {
10929     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10930       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10931           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10932                                       "lanes in the architectural constraints "
10933                                       "for SVE (min is 128-bit, max is "
10934                                       "2048-bit, by steps of 128-bit)");
10935       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10936       return;
10937     }
10938   }
10939 
10940   // Sort out parameter sequence.
10941   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10942   StringRef Prefix = "_ZGV";
10943   // Generate simdlen from user input (if any).
10944   if (UserVLEN) {
10945     if (ISA == 's') {
10946       // SVE generates only a masked function.
10947       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10948                            OutputBecomesInput, Fn);
10949     } else {
10950       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10951       // Advanced SIMD generates one or two functions, depending on
10952       // the `[not]inbranch` clause.
10953       switch (State) {
10954       case OMPDeclareSimdDeclAttr::BS_Undefined:
10955         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10956                              OutputBecomesInput, Fn);
10957         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10958                              OutputBecomesInput, Fn);
10959         break;
10960       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10961         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10962                              OutputBecomesInput, Fn);
10963         break;
10964       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10965         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10966                              OutputBecomesInput, Fn);
10967         break;
10968       }
10969     }
10970   } else {
10971     // If no user simdlen is provided, follow the AAVFABI rules for
10972     // generating the vector length.
10973     if (ISA == 's') {
10974       // SVE, section 3.4.1, item 1.
10975       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10976                            OutputBecomesInput, Fn);
10977     } else {
10978       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10979       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10980       // two vector names depending on the use of the clause
10981       // `[not]inbranch`.
10982       switch (State) {
10983       case OMPDeclareSimdDeclAttr::BS_Undefined:
10984         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10985                                   OutputBecomesInput, Fn);
10986         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10987                                   OutputBecomesInput, Fn);
10988         break;
10989       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10990         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10991                                   OutputBecomesInput, Fn);
10992         break;
10993       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10994         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10995                                   OutputBecomesInput, Fn);
10996         break;
10997       }
10998     }
10999   }
11000 }
11001 
11002 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11003                                               llvm::Function *Fn) {
11004   ASTContext &C = CGM.getContext();
11005   FD = FD->getMostRecentDecl();
11006   // Map params to their positions in function decl.
11007   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11008   if (isa<CXXMethodDecl>(FD))
11009     ParamPositions.try_emplace(FD, 0);
11010   unsigned ParamPos = ParamPositions.size();
11011   for (const ParmVarDecl *P : FD->parameters()) {
11012     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11013     ++ParamPos;
11014   }
11015   while (FD) {
11016     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11017       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11018       // Mark uniform parameters.
11019       for (const Expr *E : Attr->uniforms()) {
11020         E = E->IgnoreParenImpCasts();
11021         unsigned Pos;
11022         if (isa<CXXThisExpr>(E)) {
11023           Pos = ParamPositions[FD];
11024         } else {
11025           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11026                                 ->getCanonicalDecl();
11027           Pos = ParamPositions[PVD];
11028         }
11029         ParamAttrs[Pos].Kind = Uniform;
11030       }
11031       // Get alignment info.
11032       auto NI = Attr->alignments_begin();
11033       for (const Expr *E : Attr->aligneds()) {
11034         E = E->IgnoreParenImpCasts();
11035         unsigned Pos;
11036         QualType ParmTy;
11037         if (isa<CXXThisExpr>(E)) {
11038           Pos = ParamPositions[FD];
11039           ParmTy = E->getType();
11040         } else {
11041           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11042                                 ->getCanonicalDecl();
11043           Pos = ParamPositions[PVD];
11044           ParmTy = PVD->getType();
11045         }
11046         ParamAttrs[Pos].Alignment =
11047             (*NI)
11048                 ? (*NI)->EvaluateKnownConstInt(C)
11049                 : llvm::APSInt::getUnsigned(
11050                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11051                           .getQuantity());
11052         ++NI;
11053       }
11054       // Mark linear parameters.
11055       auto SI = Attr->steps_begin();
11056       auto MI = Attr->modifiers_begin();
11057       for (const Expr *E : Attr->linears()) {
11058         E = E->IgnoreParenImpCasts();
11059         unsigned Pos;
11060         // Rescaling factor needed to compute the linear parameter
11061         // value in the mangled name.
11062         unsigned PtrRescalingFactor = 1;
11063         if (isa<CXXThisExpr>(E)) {
11064           Pos = ParamPositions[FD];
11065         } else {
11066           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11067                                 ->getCanonicalDecl();
11068           Pos = ParamPositions[PVD];
11069           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11070             PtrRescalingFactor = CGM.getContext()
11071                                      .getTypeSizeInChars(P->getPointeeType())
11072                                      .getQuantity();
11073         }
11074         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11075         ParamAttr.Kind = Linear;
11076         // Assuming a stride of 1, for `linear` without modifiers.
11077         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11078         if (*SI) {
11079           Expr::EvalResult Result;
11080           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11081             if (const auto *DRE =
11082                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11083               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11084                 ParamAttr.Kind = LinearWithVarStride;
11085                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11086                     ParamPositions[StridePVD->getCanonicalDecl()]);
11087               }
11088             }
11089           } else {
11090             ParamAttr.StrideOrArg = Result.Val.getInt();
11091           }
11092         }
11093         // If we are using a linear clause on a pointer, we need to
11094         // rescale the value of linear_step with the byte size of the
11095         // pointee type.
11096         if (Linear == ParamAttr.Kind)
11097           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11098         ++SI;
11099         ++MI;
11100       }
11101       llvm::APSInt VLENVal;
11102       SourceLocation ExprLoc;
11103       const Expr *VLENExpr = Attr->getSimdlen();
11104       if (VLENExpr) {
11105         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11106         ExprLoc = VLENExpr->getExprLoc();
11107       }
11108       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11109       if (CGM.getTriple().isX86()) {
11110         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11111       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11112         unsigned VLEN = VLENVal.getExtValue();
11113         StringRef MangledName = Fn->getName();
11114         if (CGM.getTarget().hasFeature("sve"))
11115           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11116                                          MangledName, 's', 128, Fn, ExprLoc);
11117         if (CGM.getTarget().hasFeature("neon"))
11118           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11119                                          MangledName, 'n', 128, Fn, ExprLoc);
11120       }
11121     }
11122     FD = FD->getPreviousDecl();
11123   }
11124 }
11125 
11126 namespace {
11127 /// Cleanup action for doacross support.
11128 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11129 public:
11130   static const int DoacrossFinArgs = 2;
11131 
11132 private:
11133   llvm::FunctionCallee RTLFn;
11134   llvm::Value *Args[DoacrossFinArgs];
11135 
11136 public:
11137   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11138                     ArrayRef<llvm::Value *> CallArgs)
11139       : RTLFn(RTLFn) {
11140     assert(CallArgs.size() == DoacrossFinArgs);
11141     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11142   }
11143   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11144     if (!CGF.HaveInsertPoint())
11145       return;
11146     CGF.EmitRuntimeCall(RTLFn, Args);
11147   }
11148 };
11149 } // namespace
11150 
11151 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11152                                        const OMPLoopDirective &D,
11153                                        ArrayRef<Expr *> NumIterations) {
11154   if (!CGF.HaveInsertPoint())
11155     return;
11156 
11157   ASTContext &C = CGM.getContext();
11158   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11159   RecordDecl *RD;
11160   if (KmpDimTy.isNull()) {
11161     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11162     //  kmp_int64 lo; // lower
11163     //  kmp_int64 up; // upper
11164     //  kmp_int64 st; // stride
11165     // };
11166     RD = C.buildImplicitRecord("kmp_dim");
11167     RD->startDefinition();
11168     addFieldToRecordDecl(C, RD, Int64Ty);
11169     addFieldToRecordDecl(C, RD, Int64Ty);
11170     addFieldToRecordDecl(C, RD, Int64Ty);
11171     RD->completeDefinition();
11172     KmpDimTy = C.getRecordType(RD);
11173   } else {
11174     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11175   }
11176   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11177   QualType ArrayTy =
11178       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11179 
11180   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11181   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11182   enum { LowerFD = 0, UpperFD, StrideFD };
11183   // Fill dims with data.
11184   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11185     LValue DimsLVal = CGF.MakeAddrLValue(
11186         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11187     // dims.upper = num_iterations;
11188     LValue UpperLVal = CGF.EmitLValueForField(
11189         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11190     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11191         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11192         Int64Ty, NumIterations[I]->getExprLoc());
11193     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11194     // dims.stride = 1;
11195     LValue StrideLVal = CGF.EmitLValueForField(
11196         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11197     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11198                           StrideLVal);
11199   }
11200 
11201   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11202   // kmp_int32 num_dims, struct kmp_dim * dims);
11203   llvm::Value *Args[] = {
11204       emitUpdateLocation(CGF, D.getBeginLoc()),
11205       getThreadID(CGF, D.getBeginLoc()),
11206       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11207       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11208           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11209           CGM.VoidPtrTy)};
11210 
11211   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11212       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11213   CGF.EmitRuntimeCall(RTLFn, Args);
11214   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11215       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11216   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11217       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11218   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11219                                              llvm::makeArrayRef(FiniArgs));
11220 }
11221 
11222 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11223                                           const OMPDependClause *C) {
11224   QualType Int64Ty =
11225       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11226   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11227   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11228       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11229   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11230   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11231     const Expr *CounterVal = C->getLoopData(I);
11232     assert(CounterVal);
11233     llvm::Value *CntVal = CGF.EmitScalarConversion(
11234         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11235         CounterVal->getExprLoc());
11236     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11237                           /*Volatile=*/false, Int64Ty);
11238   }
11239   llvm::Value *Args[] = {
11240       emitUpdateLocation(CGF, C->getBeginLoc()),
11241       getThreadID(CGF, C->getBeginLoc()),
11242       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11243   llvm::FunctionCallee RTLFn;
11244   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11245     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11246                                                   OMPRTL___kmpc_doacross_post);
11247   } else {
11248     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11249     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11250                                                   OMPRTL___kmpc_doacross_wait);
11251   }
11252   CGF.EmitRuntimeCall(RTLFn, Args);
11253 }
11254 
11255 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11256                                llvm::FunctionCallee Callee,
11257                                ArrayRef<llvm::Value *> Args) const {
11258   assert(Loc.isValid() && "Outlined function call location must be valid.");
11259   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11260 
11261   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11262     if (Fn->doesNotThrow()) {
11263       CGF.EmitNounwindRuntimeCall(Fn, Args);
11264       return;
11265     }
11266   }
11267   CGF.EmitRuntimeCall(Callee, Args);
11268 }
11269 
11270 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11271     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11272     ArrayRef<llvm::Value *> Args) const {
11273   emitCall(CGF, Loc, OutlinedFn, Args);
11274 }
11275 
11276 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11277   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11278     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11279       HasEmittedDeclareTargetRegion = true;
11280 }
11281 
11282 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11283                                              const VarDecl *NativeParam,
11284                                              const VarDecl *TargetParam) const {
11285   return CGF.GetAddrOfLocalVar(NativeParam);
11286 }
11287 
11288 namespace {
11289 /// Cleanup action for allocate support.
11290 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11291 public:
11292   static const int CleanupArgs = 3;
11293 
11294 private:
11295   llvm::FunctionCallee RTLFn;
11296   llvm::Value *Args[CleanupArgs];
11297 
11298 public:
11299   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11300                        ArrayRef<llvm::Value *> CallArgs)
11301       : RTLFn(RTLFn) {
11302     assert(CallArgs.size() == CleanupArgs &&
11303            "Size of arguments does not match.");
11304     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11305   }
11306   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11307     if (!CGF.HaveInsertPoint())
11308       return;
11309     CGF.EmitRuntimeCall(RTLFn, Args);
11310   }
11311 };
11312 } // namespace
11313 
11314 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11315                                                    const VarDecl *VD) {
11316   if (!VD)
11317     return Address::invalid();
11318   const VarDecl *CVD = VD->getCanonicalDecl();
11319   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11320     return Address::invalid();
11321   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11322   // Use the default allocation.
11323   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11324        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11325       !AA->getAllocator())
11326     return Address::invalid();
11327   llvm::Value *Size;
11328   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11329   if (CVD->getType()->isVariablyModifiedType()) {
11330     Size = CGF.getTypeSize(CVD->getType());
11331     // Align the size: ((size + align - 1) / align) * align
11332     Size = CGF.Builder.CreateNUWAdd(
11333         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11334     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11335     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11336   } else {
11337     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11338     Size = CGM.getSize(Sz.alignTo(Align));
11339   }
11340   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11341   assert(AA->getAllocator() &&
11342          "Expected allocator expression for non-default allocator.");
11343   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11344   // According to the standard, the original allocator type is a enum (integer).
11345   // Convert to pointer type, if required.
11346   if (Allocator->getType()->isIntegerTy())
11347     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11348   else if (Allocator->getType()->isPointerTy())
11349     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11350                                                                 CGM.VoidPtrTy);
11351   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11352 
11353   llvm::Value *Addr =
11354       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11355                               CGM.getModule(), OMPRTL___kmpc_alloc),
11356                           Args, getName({CVD->getName(), ".void.addr"}));
11357   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11358                                                               Allocator};
11359   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11360       CGM.getModule(), OMPRTL___kmpc_free);
11361 
11362   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11363                                                 llvm::makeArrayRef(FiniArgs));
11364   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11365       Addr,
11366       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11367       getName({CVD->getName(), ".addr"}));
11368   return Address(Addr, Align);
11369 }
11370 
11371 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11372     CodeGenModule &CGM, const OMPLoopDirective &S)
11373     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11374   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11375   if (!NeedToPush)
11376     return;
11377   NontemporalDeclsSet &DS =
11378       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11379   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11380     for (const Stmt *Ref : C->private_refs()) {
11381       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11382       const ValueDecl *VD;
11383       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11384         VD = DRE->getDecl();
11385       } else {
11386         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11387         assert((ME->isImplicitCXXThis() ||
11388                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11389                "Expected member of current class.");
11390         VD = ME->getMemberDecl();
11391       }
11392       DS.insert(VD);
11393     }
11394   }
11395 }
11396 
11397 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11398   if (!NeedToPush)
11399     return;
11400   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11401 }
11402 
11403 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11404   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11405 
11406   return llvm::any_of(
11407       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11408       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11409 }
11410 
11411 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11412     const OMPExecutableDirective &S,
11413     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11414     const {
11415   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11416   // Vars in target/task regions must be excluded completely.
11417   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11418       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11419     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11420     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11421     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11422     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11423       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11424         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11425     }
11426   }
11427   // Exclude vars in private clauses.
11428   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11429     for (const Expr *Ref : C->varlists()) {
11430       if (!Ref->getType()->isScalarType())
11431         continue;
11432       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11433       if (!DRE)
11434         continue;
11435       NeedToCheckForLPCs.insert(DRE->getDecl());
11436     }
11437   }
11438   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11439     for (const Expr *Ref : C->varlists()) {
11440       if (!Ref->getType()->isScalarType())
11441         continue;
11442       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11443       if (!DRE)
11444         continue;
11445       NeedToCheckForLPCs.insert(DRE->getDecl());
11446     }
11447   }
11448   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11449     for (const Expr *Ref : C->varlists()) {
11450       if (!Ref->getType()->isScalarType())
11451         continue;
11452       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11453       if (!DRE)
11454         continue;
11455       NeedToCheckForLPCs.insert(DRE->getDecl());
11456     }
11457   }
11458   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11459     for (const Expr *Ref : C->varlists()) {
11460       if (!Ref->getType()->isScalarType())
11461         continue;
11462       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11463       if (!DRE)
11464         continue;
11465       NeedToCheckForLPCs.insert(DRE->getDecl());
11466     }
11467   }
11468   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11469     for (const Expr *Ref : C->varlists()) {
11470       if (!Ref->getType()->isScalarType())
11471         continue;
11472       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11473       if (!DRE)
11474         continue;
11475       NeedToCheckForLPCs.insert(DRE->getDecl());
11476     }
11477   }
11478   for (const Decl *VD : NeedToCheckForLPCs) {
11479     for (const LastprivateConditionalData &Data :
11480          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11481       if (Data.DeclToUniqueName.count(VD) > 0) {
11482         if (!Data.Disabled)
11483           NeedToAddForLPCsAsDisabled.insert(VD);
11484         break;
11485       }
11486     }
11487   }
11488 }
11489 
11490 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11491     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11492     : CGM(CGF.CGM),
11493       Action((CGM.getLangOpts().OpenMP >= 50 &&
11494               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11495                            [](const OMPLastprivateClause *C) {
11496                              return C->getKind() ==
11497                                     OMPC_LASTPRIVATE_conditional;
11498                            }))
11499                  ? ActionToDo::PushAsLastprivateConditional
11500                  : ActionToDo::DoNotPush) {
11501   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11502   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11503     return;
11504   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11505          "Expected a push action.");
11506   LastprivateConditionalData &Data =
11507       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11508   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11509     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11510       continue;
11511 
11512     for (const Expr *Ref : C->varlists()) {
11513       Data.DeclToUniqueName.insert(std::make_pair(
11514           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11515           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11516     }
11517   }
11518   Data.IVLVal = IVLVal;
11519   Data.Fn = CGF.CurFn;
11520 }
11521 
11522 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11523     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11524     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11525   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11526   if (CGM.getLangOpts().OpenMP < 50)
11527     return;
11528   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11529   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11530   if (!NeedToAddForLPCsAsDisabled.empty()) {
11531     Action = ActionToDo::DisableLastprivateConditional;
11532     LastprivateConditionalData &Data =
11533         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11534     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11535       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11536     Data.Fn = CGF.CurFn;
11537     Data.Disabled = true;
11538   }
11539 }
11540 
11541 CGOpenMPRuntime::LastprivateConditionalRAII
11542 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11543     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11544   return LastprivateConditionalRAII(CGF, S);
11545 }
11546 
11547 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11548   if (CGM.getLangOpts().OpenMP < 50)
11549     return;
11550   if (Action == ActionToDo::DisableLastprivateConditional) {
11551     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11552            "Expected list of disabled private vars.");
11553     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11554   }
11555   if (Action == ActionToDo::PushAsLastprivateConditional) {
11556     assert(
11557         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11558         "Expected list of lastprivate conditional vars.");
11559     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11560   }
11561 }
11562 
11563 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11564                                                         const VarDecl *VD) {
11565   ASTContext &C = CGM.getContext();
11566   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11567   if (I == LastprivateConditionalToTypes.end())
11568     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11569   QualType NewType;
11570   const FieldDecl *VDField;
11571   const FieldDecl *FiredField;
11572   LValue BaseLVal;
11573   auto VI = I->getSecond().find(VD);
11574   if (VI == I->getSecond().end()) {
11575     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11576     RD->startDefinition();
11577     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11578     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11579     RD->completeDefinition();
11580     NewType = C.getRecordType(RD);
11581     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11582     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11583     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11584   } else {
11585     NewType = std::get<0>(VI->getSecond());
11586     VDField = std::get<1>(VI->getSecond());
11587     FiredField = std::get<2>(VI->getSecond());
11588     BaseLVal = std::get<3>(VI->getSecond());
11589   }
11590   LValue FiredLVal =
11591       CGF.EmitLValueForField(BaseLVal, FiredField);
11592   CGF.EmitStoreOfScalar(
11593       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11594       FiredLVal);
11595   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11596 }
11597 
11598 namespace {
11599 /// Checks if the lastprivate conditional variable is referenced in LHS.
11600 class LastprivateConditionalRefChecker final
11601     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11602   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11603   const Expr *FoundE = nullptr;
11604   const Decl *FoundD = nullptr;
11605   StringRef UniqueDeclName;
11606   LValue IVLVal;
11607   llvm::Function *FoundFn = nullptr;
11608   SourceLocation Loc;
11609 
11610 public:
11611   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11612     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11613          llvm::reverse(LPM)) {
11614       auto It = D.DeclToUniqueName.find(E->getDecl());
11615       if (It == D.DeclToUniqueName.end())
11616         continue;
11617       if (D.Disabled)
11618         return false;
11619       FoundE = E;
11620       FoundD = E->getDecl()->getCanonicalDecl();
11621       UniqueDeclName = It->second;
11622       IVLVal = D.IVLVal;
11623       FoundFn = D.Fn;
11624       break;
11625     }
11626     return FoundE == E;
11627   }
11628   bool VisitMemberExpr(const MemberExpr *E) {
11629     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11630       return false;
11631     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11632          llvm::reverse(LPM)) {
11633       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11634       if (It == D.DeclToUniqueName.end())
11635         continue;
11636       if (D.Disabled)
11637         return false;
11638       FoundE = E;
11639       FoundD = E->getMemberDecl()->getCanonicalDecl();
11640       UniqueDeclName = It->second;
11641       IVLVal = D.IVLVal;
11642       FoundFn = D.Fn;
11643       break;
11644     }
11645     return FoundE == E;
11646   }
11647   bool VisitStmt(const Stmt *S) {
11648     for (const Stmt *Child : S->children()) {
11649       if (!Child)
11650         continue;
11651       if (const auto *E = dyn_cast<Expr>(Child))
11652         if (!E->isGLValue())
11653           continue;
11654       if (Visit(Child))
11655         return true;
11656     }
11657     return false;
11658   }
11659   explicit LastprivateConditionalRefChecker(
11660       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11661       : LPM(LPM) {}
11662   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11663   getFoundData() const {
11664     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11665   }
11666 };
11667 } // namespace
11668 
11669 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11670                                                        LValue IVLVal,
11671                                                        StringRef UniqueDeclName,
11672                                                        LValue LVal,
11673                                                        SourceLocation Loc) {
11674   // Last updated loop counter for the lastprivate conditional var.
11675   // int<xx> last_iv = 0;
11676   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11677   llvm::Constant *LastIV =
11678       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11679   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11680       IVLVal.getAlignment().getAsAlign());
11681   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11682 
11683   // Last value of the lastprivate conditional.
11684   // decltype(priv_a) last_a;
11685   llvm::Constant *Last = getOrCreateInternalVariable(
11686       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11687   cast<llvm::GlobalVariable>(Last)->setAlignment(
11688       LVal.getAlignment().getAsAlign());
11689   LValue LastLVal =
11690       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11691 
11692   // Global loop counter. Required to handle inner parallel-for regions.
11693   // iv
11694   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11695 
11696   // #pragma omp critical(a)
11697   // if (last_iv <= iv) {
11698   //   last_iv = iv;
11699   //   last_a = priv_a;
11700   // }
11701   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11702                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11703     Action.Enter(CGF);
11704     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11705     // (last_iv <= iv) ? Check if the variable is updated and store new
11706     // value in global var.
11707     llvm::Value *CmpRes;
11708     if (IVLVal.getType()->isSignedIntegerType()) {
11709       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11710     } else {
11711       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11712              "Loop iteration variable must be integer.");
11713       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11714     }
11715     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11716     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11717     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11718     // {
11719     CGF.EmitBlock(ThenBB);
11720 
11721     //   last_iv = iv;
11722     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11723 
11724     //   last_a = priv_a;
11725     switch (CGF.getEvaluationKind(LVal.getType())) {
11726     case TEK_Scalar: {
11727       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11728       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11729       break;
11730     }
11731     case TEK_Complex: {
11732       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11733       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11734       break;
11735     }
11736     case TEK_Aggregate:
11737       llvm_unreachable(
11738           "Aggregates are not supported in lastprivate conditional.");
11739     }
11740     // }
11741     CGF.EmitBranch(ExitBB);
11742     // There is no need to emit line number for unconditional branch.
11743     (void)ApplyDebugLocation::CreateEmpty(CGF);
11744     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11745   };
11746 
11747   if (CGM.getLangOpts().OpenMPSimd) {
11748     // Do not emit as a critical region as no parallel region could be emitted.
11749     RegionCodeGenTy ThenRCG(CodeGen);
11750     ThenRCG(CGF);
11751   } else {
11752     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11753   }
11754 }
11755 
11756 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11757                                                          const Expr *LHS) {
11758   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11759     return;
11760   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11761   if (!Checker.Visit(LHS))
11762     return;
11763   const Expr *FoundE;
11764   const Decl *FoundD;
11765   StringRef UniqueDeclName;
11766   LValue IVLVal;
11767   llvm::Function *FoundFn;
11768   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11769       Checker.getFoundData();
11770   if (FoundFn != CGF.CurFn) {
11771     // Special codegen for inner parallel regions.
11772     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11773     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11774     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11775            "Lastprivate conditional is not found in outer region.");
11776     QualType StructTy = std::get<0>(It->getSecond());
11777     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11778     LValue PrivLVal = CGF.EmitLValue(FoundE);
11779     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11780         PrivLVal.getAddress(CGF),
11781         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11782     LValue BaseLVal =
11783         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11784     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11785     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11786                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11787                         FiredLVal, llvm::AtomicOrdering::Unordered,
11788                         /*IsVolatile=*/true, /*isInit=*/false);
11789     return;
11790   }
11791 
11792   // Private address of the lastprivate conditional in the current context.
11793   // priv_a
11794   LValue LVal = CGF.EmitLValue(FoundE);
11795   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11796                                    FoundE->getExprLoc());
11797 }
11798 
11799 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11800     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11801     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11802   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11803     return;
11804   auto Range = llvm::reverse(LastprivateConditionalStack);
11805   auto It = llvm::find_if(
11806       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11807   if (It == Range.end() || It->Fn != CGF.CurFn)
11808     return;
11809   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11810   assert(LPCI != LastprivateConditionalToTypes.end() &&
11811          "Lastprivates must be registered already.");
11812   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11813   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11814   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11815   for (const auto &Pair : It->DeclToUniqueName) {
11816     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11817     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11818       continue;
11819     auto I = LPCI->getSecond().find(Pair.first);
11820     assert(I != LPCI->getSecond().end() &&
11821            "Lastprivate must be rehistered already.");
11822     // bool Cmp = priv_a.Fired != 0;
11823     LValue BaseLVal = std::get<3>(I->getSecond());
11824     LValue FiredLVal =
11825         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11826     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11827     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11828     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11829     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11830     // if (Cmp) {
11831     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11832     CGF.EmitBlock(ThenBB);
11833     Address Addr = CGF.GetAddrOfLocalVar(VD);
11834     LValue LVal;
11835     if (VD->getType()->isReferenceType())
11836       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11837                                            AlignmentSource::Decl);
11838     else
11839       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11840                                 AlignmentSource::Decl);
11841     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11842                                      D.getBeginLoc());
11843     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11844     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11845     // }
11846   }
11847 }
11848 
11849 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11850     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11851     SourceLocation Loc) {
11852   if (CGF.getLangOpts().OpenMP < 50)
11853     return;
11854   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11855   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11856          "Unknown lastprivate conditional variable.");
11857   StringRef UniqueName = It->second;
11858   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11859   // The variable was not updated in the region - exit.
11860   if (!GV)
11861     return;
11862   LValue LPLVal = CGF.MakeAddrLValue(
11863       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11864   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11865   CGF.EmitStoreOfScalar(Res, PrivLVal);
11866 }
11867 
11868 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11869     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11870     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11871   llvm_unreachable("Not supported in SIMD-only mode");
11872 }
11873 
11874 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11875     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11876     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11877   llvm_unreachable("Not supported in SIMD-only mode");
11878 }
11879 
11880 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11881     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11882     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11883     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11884     bool Tied, unsigned &NumberOfParts) {
11885   llvm_unreachable("Not supported in SIMD-only mode");
11886 }
11887 
11888 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11889                                            SourceLocation Loc,
11890                                            llvm::Function *OutlinedFn,
11891                                            ArrayRef<llvm::Value *> CapturedVars,
11892                                            const Expr *IfCond) {
11893   llvm_unreachable("Not supported in SIMD-only mode");
11894 }
11895 
11896 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11897     CodeGenFunction &CGF, StringRef CriticalName,
11898     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11899     const Expr *Hint) {
11900   llvm_unreachable("Not supported in SIMD-only mode");
11901 }
11902 
11903 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11904                                            const RegionCodeGenTy &MasterOpGen,
11905                                            SourceLocation Loc) {
11906   llvm_unreachable("Not supported in SIMD-only mode");
11907 }
11908 
11909 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11910                                             SourceLocation Loc) {
11911   llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913 
11914 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11915     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11916     SourceLocation Loc) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitSingleRegion(
11921     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11922     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11923     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11924     ArrayRef<const Expr *> AssignmentOps) {
11925   llvm_unreachable("Not supported in SIMD-only mode");
11926 }
11927 
11928 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11929                                             const RegionCodeGenTy &OrderedOpGen,
11930                                             SourceLocation Loc,
11931                                             bool IsThreads) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11936                                           SourceLocation Loc,
11937                                           OpenMPDirectiveKind Kind,
11938                                           bool EmitChecks,
11939                                           bool ForceSimpleCall) {
11940   llvm_unreachable("Not supported in SIMD-only mode");
11941 }
11942 
11943 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11944     CodeGenFunction &CGF, SourceLocation Loc,
11945     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11946     bool Ordered, const DispatchRTInput &DispatchValues) {
11947   llvm_unreachable("Not supported in SIMD-only mode");
11948 }
11949 
11950 void CGOpenMPSIMDRuntime::emitForStaticInit(
11951     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11952     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11953   llvm_unreachable("Not supported in SIMD-only mode");
11954 }
11955 
11956 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11957     CodeGenFunction &CGF, SourceLocation Loc,
11958     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11959   llvm_unreachable("Not supported in SIMD-only mode");
11960 }
11961 
11962 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11963                                                      SourceLocation Loc,
11964                                                      unsigned IVSize,
11965                                                      bool IVSigned) {
11966   llvm_unreachable("Not supported in SIMD-only mode");
11967 }
11968 
11969 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11970                                               SourceLocation Loc,
11971                                               OpenMPDirectiveKind DKind) {
11972   llvm_unreachable("Not supported in SIMD-only mode");
11973 }
11974 
11975 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11976                                               SourceLocation Loc,
11977                                               unsigned IVSize, bool IVSigned,
11978                                               Address IL, Address LB,
11979                                               Address UB, Address ST) {
11980   llvm_unreachable("Not supported in SIMD-only mode");
11981 }
11982 
11983 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11984                                                llvm::Value *NumThreads,
11985                                                SourceLocation Loc) {
11986   llvm_unreachable("Not supported in SIMD-only mode");
11987 }
11988 
11989 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11990                                              ProcBindKind ProcBind,
11991                                              SourceLocation Loc) {
11992   llvm_unreachable("Not supported in SIMD-only mode");
11993 }
11994 
11995 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11996                                                     const VarDecl *VD,
11997                                                     Address VDAddr,
11998                                                     SourceLocation Loc) {
11999   llvm_unreachable("Not supported in SIMD-only mode");
12000 }
12001 
12002 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12003     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12004     CodeGenFunction *CGF) {
12005   llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007 
12008 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12009     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12014                                     ArrayRef<const Expr *> Vars,
12015                                     SourceLocation Loc,
12016                                     llvm::AtomicOrdering AO) {
12017   llvm_unreachable("Not supported in SIMD-only mode");
12018 }
12019 
12020 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12021                                        const OMPExecutableDirective &D,
12022                                        llvm::Function *TaskFunction,
12023                                        QualType SharedsTy, Address Shareds,
12024                                        const Expr *IfCond,
12025                                        const OMPTaskDataTy &Data) {
12026   llvm_unreachable("Not supported in SIMD-only mode");
12027 }
12028 
12029 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12030     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12031     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12032     const Expr *IfCond, const OMPTaskDataTy &Data) {
12033   llvm_unreachable("Not supported in SIMD-only mode");
12034 }
12035 
12036 void CGOpenMPSIMDRuntime::emitReduction(
12037     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12038     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12039     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12040   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12041   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12042                                  ReductionOps, Options);
12043 }
12044 
12045 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12046     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12047     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12048   llvm_unreachable("Not supported in SIMD-only mode");
12049 }
12050 
12051 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12052                                                 SourceLocation Loc,
12053                                                 bool IsWorksharingReduction) {
12054   llvm_unreachable("Not supported in SIMD-only mode");
12055 }
12056 
12057 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12058                                                   SourceLocation Loc,
12059                                                   ReductionCodeGen &RCG,
12060                                                   unsigned N) {
12061   llvm_unreachable("Not supported in SIMD-only mode");
12062 }
12063 
12064 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12065                                                   SourceLocation Loc,
12066                                                   llvm::Value *ReductionsPtr,
12067                                                   LValue SharedLVal) {
12068   llvm_unreachable("Not supported in SIMD-only mode");
12069 }
12070 
12071 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12072                                            SourceLocation Loc) {
12073   llvm_unreachable("Not supported in SIMD-only mode");
12074 }
12075 
12076 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12077     CodeGenFunction &CGF, SourceLocation Loc,
12078     OpenMPDirectiveKind CancelRegion) {
12079   llvm_unreachable("Not supported in SIMD-only mode");
12080 }
12081 
12082 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12083                                          SourceLocation Loc, const Expr *IfCond,
12084                                          OpenMPDirectiveKind CancelRegion) {
12085   llvm_unreachable("Not supported in SIMD-only mode");
12086 }
12087 
12088 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12089     const OMPExecutableDirective &D, StringRef ParentName,
12090     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12091     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12092   llvm_unreachable("Not supported in SIMD-only mode");
12093 }
12094 
12095 void CGOpenMPSIMDRuntime::emitTargetCall(
12096     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12097     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12098     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12099     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12100                                      const OMPLoopDirective &D)>
12101         SizeEmitter) {
12102   llvm_unreachable("Not supported in SIMD-only mode");
12103 }
12104 
12105 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12106   llvm_unreachable("Not supported in SIMD-only mode");
12107 }
12108 
12109 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12110   llvm_unreachable("Not supported in SIMD-only mode");
12111 }
12112 
12113 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12114   return false;
12115 }
12116 
12117 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12118                                         const OMPExecutableDirective &D,
12119                                         SourceLocation Loc,
12120                                         llvm::Function *OutlinedFn,
12121                                         ArrayRef<llvm::Value *> CapturedVars) {
12122   llvm_unreachable("Not supported in SIMD-only mode");
12123 }
12124 
12125 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12126                                              const Expr *NumTeams,
12127                                              const Expr *ThreadLimit,
12128                                              SourceLocation Loc) {
12129   llvm_unreachable("Not supported in SIMD-only mode");
12130 }
12131 
12132 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12133     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12134     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12135   llvm_unreachable("Not supported in SIMD-only mode");
12136 }
12137 
12138 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12139     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12140     const Expr *Device) {
12141   llvm_unreachable("Not supported in SIMD-only mode");
12142 }
12143 
12144 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12145                                            const OMPLoopDirective &D,
12146                                            ArrayRef<Expr *> NumIterations) {
12147   llvm_unreachable("Not supported in SIMD-only mode");
12148 }
12149 
12150 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12151                                               const OMPDependClause *C) {
12152   llvm_unreachable("Not supported in SIMD-only mode");
12153 }
12154 
12155 const VarDecl *
12156 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12157                                         const VarDecl *NativeParam) const {
12158   llvm_unreachable("Not supported in SIMD-only mode");
12159 }
12160 
12161 Address
12162 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12163                                          const VarDecl *NativeParam,
12164                                          const VarDecl *TargetParam) const {
12165   llvm_unreachable("Not supported in SIMD-only mode");
12166 }
12167