1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     if (DRD && DRD->getInitializer())
890       (void)DefaultInit(CGF);
891     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
892   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
893     (void)DefaultInit(CGF);
894     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
895                                      PrivateAddr, SharedLVal.getAddress(CGF),
896                                      SharedLVal.getType());
897   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
898              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
899     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
900                          PrivateVD->getType().getQualifiers(),
901                          /*IsInitializer=*/false);
902   }
903 }
904 
905 bool ReductionCodeGen::needCleanups(unsigned N) {
906   const auto *PrivateVD =
907       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
908   QualType PrivateType = PrivateVD->getType();
909   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
910   return DTorKind != QualType::DK_none;
911 }
912 
913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
914                                     Address PrivateAddr) {
915   const auto *PrivateVD =
916       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917   QualType PrivateType = PrivateVD->getType();
918   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
919   if (needCleanups(N)) {
920     PrivateAddr = CGF.Builder.CreateElementBitCast(
921         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
922     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
923   }
924 }
925 
926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
927                           LValue BaseLV) {
928   BaseTy = BaseTy.getNonReferenceType();
929   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
930          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
931     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
932       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
933     } else {
934       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
935       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
936     }
937     BaseTy = BaseTy->getPointeeType();
938   }
939   return CGF.MakeAddrLValue(
940       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
941                                        CGF.ConvertTypeForMem(ElTy)),
942       BaseLV.getType(), BaseLV.getBaseInfo(),
943       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
944 }
945 
946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
947                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
948                           llvm::Value *Addr) {
949   Address Tmp = Address::invalid();
950   Address TopTmp = Address::invalid();
951   Address MostTopTmp = Address::invalid();
952   BaseTy = BaseTy.getNonReferenceType();
953   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
954          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
955     Tmp = CGF.CreateMemTemp(BaseTy);
956     if (TopTmp.isValid())
957       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
958     else
959       MostTopTmp = Tmp;
960     TopTmp = Tmp;
961     BaseTy = BaseTy->getPointeeType();
962   }
963   llvm::Type *Ty = BaseLVType;
964   if (Tmp.isValid())
965     Ty = Tmp.getElementType();
966   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
967   if (Tmp.isValid()) {
968     CGF.Builder.CreateStore(Addr, Tmp);
969     return MostTopTmp;
970   }
971   return Address(Addr, BaseLVAlignment);
972 }
973 
974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
975   const VarDecl *OrigVD = nullptr;
976   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
977     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
978     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
979       Base = TempOASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
985     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   }
991   return OrigVD;
992 }
993 
994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
995                                                Address PrivateAddr) {
996   const DeclRefExpr *DE;
997   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
998     BaseDecls.emplace_back(OrigVD);
999     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1000     LValue BaseLValue =
1001         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1002                     OriginalBaseLValue);
1003     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1004         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1005     llvm::Value *PrivatePointer =
1006         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1007             PrivateAddr.getPointer(),
1008             SharedAddresses[N].first.getAddress(CGF).getType());
1009     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1010     return castToBase(CGF, OrigVD->getType(),
1011                       SharedAddresses[N].first.getType(),
1012                       OriginalBaseLValue.getAddress(CGF).getType(),
1013                       OriginalBaseLValue.getAlignment(), Ptr);
1014   }
1015   BaseDecls.emplace_back(
1016       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1017   return PrivateAddr;
1018 }
1019 
1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1021   const OMPDeclareReductionDecl *DRD =
1022       getReductionInit(ClausesData[N].ReductionOp);
1023   return DRD && DRD->getInitializer();
1024 }
1025 
1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1027   return CGF.EmitLoadOfPointerLValue(
1028       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1029       getThreadIDVariable()->getType()->castAs<PointerType>());
1030 }
1031 
1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1033   if (!CGF.HaveInsertPoint())
1034     return;
1035   // 1.2.2 OpenMP Language Terminology
1036   // Structured block - An executable statement with a single entry at the
1037   // top and a single exit at the bottom.
1038   // The point of exit cannot be a branch out of the structured block.
1039   // longjmp() and throw() must not violate the entry/exit criteria.
1040   CGF.EHStack.pushTerminate();
1041   CodeGen(CGF);
1042   CGF.EHStack.popTerminate();
1043 }
1044 
1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1046     CodeGenFunction &CGF) {
1047   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1048                             getThreadIDVariable()->getType(),
1049                             AlignmentSource::Decl);
1050 }
1051 
1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1053                                        QualType FieldTy) {
1054   auto *Field = FieldDecl::Create(
1055       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1056       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1057       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1058   Field->setAccess(AS_public);
1059   DC->addDecl(Field);
1060   return Field;
1061 }
1062 
1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1064                                  StringRef Separator)
1065     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1066       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1067   ASTContext &C = CGM.getContext();
1068   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1069   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1070   RD->startDefinition();
1071   // reserved_1
1072   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1073   // flags
1074   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1075   // reserved_2
1076   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1077   // reserved_3
1078   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1079   // psource
1080   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1081   RD->completeDefinition();
1082   IdentQTy = C.getRecordType(RD);
1083   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1084   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1085 
1086   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1087   OMPBuilder.initialize();
1088   loadOffloadInfoMetadata();
1089 }
1090 
1091 void CGOpenMPRuntime::clear() {
1092   InternalVars.clear();
1093   // Clean non-target variable declarations possibly used only in debug info.
1094   for (const auto &Data : EmittedNonTargetVariables) {
1095     if (!Data.getValue().pointsToAliveValue())
1096       continue;
1097     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1098     if (!GV)
1099       continue;
1100     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1101       continue;
1102     GV->eraseFromParent();
1103   }
1104 }
1105 
1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1107   SmallString<128> Buffer;
1108   llvm::raw_svector_ostream OS(Buffer);
1109   StringRef Sep = FirstSeparator;
1110   for (StringRef Part : Parts) {
1111     OS << Sep << Part;
1112     Sep = Separator;
1113   }
1114   return std::string(OS.str());
1115 }
1116 
1117 static llvm::Function *
1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1119                           const Expr *CombinerInitializer, const VarDecl *In,
1120                           const VarDecl *Out, bool IsCombiner) {
1121   // void .omp_combiner.(Ty *in, Ty *out);
1122   ASTContext &C = CGM.getContext();
1123   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1124   FunctionArgList Args;
1125   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1126                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1127   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1128                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1129   Args.push_back(&OmpOutParm);
1130   Args.push_back(&OmpInParm);
1131   const CGFunctionInfo &FnInfo =
1132       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1133   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1134   std::string Name = CGM.getOpenMPRuntime().getName(
1135       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1136   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1137                                     Name, &CGM.getModule());
1138   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1139   if (CGM.getLangOpts().Optimize) {
1140     Fn->removeFnAttr(llvm::Attribute::NoInline);
1141     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1142     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1143   }
1144   CodeGenFunction CGF(CGM);
1145   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1146   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1148                     Out->getLocation());
1149   CodeGenFunction::OMPPrivateScope Scope(CGF);
1150   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1151   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1156   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1157     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1158         .getAddress(CGF);
1159   });
1160   (void)Scope.Privatize();
1161   if (!IsCombiner && Out->hasInit() &&
1162       !CGF.isTrivialInitializer(Out->getInit())) {
1163     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1164                          Out->getType().getQualifiers(),
1165                          /*IsInitializer=*/true);
1166   }
1167   if (CombinerInitializer)
1168     CGF.EmitIgnoredExpr(CombinerInitializer);
1169   Scope.ForceCleanup();
1170   CGF.FinishFunction();
1171   return Fn;
1172 }
1173 
1174 void CGOpenMPRuntime::emitUserDefinedReduction(
1175     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1176   if (UDRMap.count(D) > 0)
1177     return;
1178   llvm::Function *Combiner = emitCombinerOrInitializer(
1179       CGM, D->getType(), D->getCombiner(),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1181       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1182       /*IsCombiner=*/true);
1183   llvm::Function *Initializer = nullptr;
1184   if (const Expr *Init = D->getInitializer()) {
1185     Initializer = emitCombinerOrInitializer(
1186         CGM, D->getType(),
1187         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1188                                                                      : nullptr,
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1190         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1191         /*IsCombiner=*/false);
1192   }
1193   UDRMap.try_emplace(D, Combiner, Initializer);
1194   if (CGF) {
1195     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1196     Decls.second.push_back(D);
1197   }
1198 }
1199 
1200 std::pair<llvm::Function *, llvm::Function *>
1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1202   auto I = UDRMap.find(D);
1203   if (I != UDRMap.end())
1204     return I->second;
1205   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1206   return UDRMap.lookup(D);
1207 }
1208 
1209 namespace {
1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1211 // Builder if one is present.
1212 struct PushAndPopStackRAII {
1213   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1214                       bool HasCancel)
1215       : OMPBuilder(OMPBuilder) {
1216     if (!OMPBuilder)
1217       return;
1218 
1219     // The following callback is the crucial part of clangs cleanup process.
1220     //
1221     // NOTE:
1222     // Once the OpenMPIRBuilder is used to create parallel regions (and
1223     // similar), the cancellation destination (Dest below) is determined via
1224     // IP. That means if we have variables to finalize we split the block at IP,
1225     // use the new block (=BB) as destination to build a JumpDest (via
1226     // getJumpDestInCurrentScope(BB)) which then is fed to
1227     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1228     // to push & pop an FinalizationInfo object.
1229     // The FiniCB will still be needed but at the point where the
1230     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1231     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1232       assert(IP.getBlock()->end() == IP.getPoint() &&
1233              "Clang CG should cause non-terminated block!");
1234       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1235       CGF.Builder.restoreIP(IP);
1236       CodeGenFunction::JumpDest Dest =
1237           CGF.getOMPCancelDestination(OMPD_parallel);
1238       CGF.EmitBranchThroughCleanup(Dest);
1239     };
1240 
1241     // TODO: Remove this once we emit parallel regions through the
1242     //       OpenMPIRBuilder as it can do this setup internally.
1243     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1244         {FiniCB, OMPD_parallel, HasCancel});
1245     OMPBuilder->pushFinalizationCB(std::move(FI));
1246   }
1247   ~PushAndPopStackRAII() {
1248     if (OMPBuilder)
1249       OMPBuilder->popFinalizationCB();
1250   }
1251   llvm::OpenMPIRBuilder *OMPBuilder;
1252 };
1253 } // namespace
1254 
1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1256     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1257     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1258     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1259   assert(ThreadIDVar->getType()->isPointerType() &&
1260          "thread id variable must be of type kmp_int32 *");
1261   CodeGenFunction CGF(CGM, true);
1262   bool HasCancel = false;
1263   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1266     HasCancel = OPD->hasCancel();
1267   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1268     HasCancel = OPSD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278   else if (const auto *OPFD =
1279                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1280     HasCancel = OPFD->hasCancel();
1281 
1282   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1283   //       parallel region to make cancellation barriers work properly.
1284   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1285   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1286   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1287                                     HasCancel, OutlinedHelperName);
1288   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1289   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1290 }
1291 
1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1303   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1304   return emitParallelOrTeamsOutlinedFunction(
1305       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1306 }
1307 
1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1309     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1310     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1311     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1312     bool Tied, unsigned &NumberOfParts) {
1313   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1314                                               PrePostActionTy &) {
1315     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1316     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1317     llvm::Value *TaskArgs[] = {
1318         UpLoc, ThreadID,
1319         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1320                                     TaskTVar->getType()->castAs<PointerType>())
1321             .getPointer(CGF)};
1322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1323                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1324                         TaskArgs);
1325   };
1326   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1327                                                             UntiedCodeGen);
1328   CodeGen.setAction(Action);
1329   assert(!ThreadIDVar->getType()->isPointerType() &&
1330          "thread id variable must be of type kmp_int32 for tasks");
1331   const OpenMPDirectiveKind Region =
1332       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1333                                                       : OMPD_task;
1334   const CapturedStmt *CS = D.getCapturedStmt(Region);
1335   bool HasCancel = false;
1336   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1343     HasCancel = TD->hasCancel();
1344 
1345   CodeGenFunction CGF(CGM, true);
1346   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1347                                         InnermostKind, HasCancel, Action);
1348   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1349   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1350   if (!Tied)
1351     NumberOfParts = Action.getNumberOfParts();
1352   return Res;
1353 }
1354 
1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1356                              const RecordDecl *RD, const CGRecordLayout &RL,
1357                              ArrayRef<llvm::Constant *> Data) {
1358   llvm::StructType *StructTy = RL.getLLVMType();
1359   unsigned PrevIdx = 0;
1360   ConstantInitBuilder CIBuilder(CGM);
1361   auto DI = Data.begin();
1362   for (const FieldDecl *FD : RD->fields()) {
1363     unsigned Idx = RL.getLLVMFieldNo(FD);
1364     // Fill the alignment.
1365     for (unsigned I = PrevIdx; I < Idx; ++I)
1366       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1367     PrevIdx = Idx + 1;
1368     Fields.add(*DI);
1369     ++DI;
1370   }
1371 }
1372 
1373 template <class... As>
1374 static llvm::GlobalVariable *
1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1376                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1377                    As &&... Args) {
1378   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1379   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1380   ConstantInitBuilder CIBuilder(CGM);
1381   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1382   buildStructValue(Fields, CGM, RD, RL, Data);
1383   return Fields.finishAndCreateGlobal(
1384       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1385       std::forward<As>(Args)...);
1386 }
1387 
1388 template <typename T>
1389 static void
1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1391                                          ArrayRef<llvm::Constant *> Data,
1392                                          T &Parent) {
1393   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1394   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1395   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1396   buildStructValue(Fields, CGM, RD, RL, Data);
1397   Fields.finishAndAddTo(Parent);
1398 }
1399 
1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1401   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1402   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1403   FlagsTy FlagsKey(Flags, Reserved2Flags);
1404   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1405   if (!Entry) {
1406     if (!DefaultOpenMPPSource) {
1407       // Initialize default location for psource field of ident_t structure of
1408       // all ident_t objects. Format is ";file;function;line;column;;".
1409       // Taken from
1410       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1411       DefaultOpenMPPSource =
1412           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1413       DefaultOpenMPPSource =
1414           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1415     }
1416 
1417     llvm::Constant *Data[] = {
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1419         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1420         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1421         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1422     llvm::GlobalValue *DefaultOpenMPLocation =
1423         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1424                            llvm::GlobalValue::PrivateLinkage);
1425     DefaultOpenMPLocation->setUnnamedAddr(
1426         llvm::GlobalValue::UnnamedAddr::Global);
1427 
1428     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1429   }
1430   return Address(Entry, Align);
1431 }
1432 
1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1434                                              bool AtCurrentPoint) {
1435   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1436   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1437 
1438   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1439   if (AtCurrentPoint) {
1440     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1441         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1442   } else {
1443     Elem.second.ServiceInsertPt =
1444         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1445     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1446   }
1447 }
1448 
1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1450   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1451   if (Elem.second.ServiceInsertPt) {
1452     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1453     Elem.second.ServiceInsertPt = nullptr;
1454     Ptr->eraseFromParent();
1455   }
1456 }
1457 
1458 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1459                                                  SourceLocation Loc,
1460                                                  unsigned Flags) {
1461   Flags |= OMP_IDENT_KMPC;
1462   // If no debug info is generated - return global default location.
1463   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1464       Loc.isInvalid())
1465     return getOrCreateDefaultLocation(Flags).getPointer();
1466 
1467   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1468 
1469   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1470   Address LocValue = Address::invalid();
1471   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472   if (I != OpenMPLocThreadIDMap.end())
1473     LocValue = Address(I->second.DebugLoc, Align);
1474 
1475   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1476   // GetOpenMPThreadID was called before this routine.
1477   if (!LocValue.isValid()) {
1478     // Generate "ident_t .kmpc_loc.addr;"
1479     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1480     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1481     Elem.second.DebugLoc = AI.getPointer();
1482     LocValue = AI;
1483 
1484     if (!Elem.second.ServiceInsertPt)
1485       setLocThreadIdInsertPt(CGF);
1486     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1487     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1488     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1489                              CGF.getTypeSize(IdentQTy));
1490   }
1491 
1492   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1493   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1494   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1495   LValue PSource =
1496       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1497 
1498   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1499   if (OMPDebugLoc == nullptr) {
1500     SmallString<128> Buffer2;
1501     llvm::raw_svector_ostream OS2(Buffer2);
1502     // Build debug location
1503     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1504     OS2 << ";" << PLoc.getFilename() << ";";
1505     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1506       OS2 << FD->getQualifiedNameAsString();
1507     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1508     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1509     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1510   }
1511   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1512   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1513 
1514   // Our callers always pass this to a runtime function, so for
1515   // convenience, go ahead and return a naked pointer.
1516   return LocValue.getPointer();
1517 }
1518 
1519 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1520                                           SourceLocation Loc) {
1521   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1522 
1523   llvm::Value *ThreadID = nullptr;
1524   // Check whether we've already cached a load of the thread id in this
1525   // function.
1526   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1527   if (I != OpenMPLocThreadIDMap.end()) {
1528     ThreadID = I->second.ThreadID;
1529     if (ThreadID != nullptr)
1530       return ThreadID;
1531   }
1532   // If exceptions are enabled, do not use parameter to avoid possible crash.
1533   if (auto *OMPRegionInfo =
1534           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1535     if (OMPRegionInfo->getThreadIDVariable()) {
1536       // Check if this an outlined function with thread id passed as argument.
1537       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1538       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1539       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1540           !CGF.getLangOpts().CXXExceptions ||
1541           CGF.Builder.GetInsertBlock() == TopBlock ||
1542           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1543           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1544               TopBlock ||
1545           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1546               CGF.Builder.GetInsertBlock()) {
1547         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1548         // If value loaded in entry block, cache it and use it everywhere in
1549         // function.
1550         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1551           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1552           Elem.second.ThreadID = ThreadID;
1553         }
1554         return ThreadID;
1555       }
1556     }
1557   }
1558 
1559   // This is not an outlined function region - need to call __kmpc_int32
1560   // kmpc_global_thread_num(ident_t *loc).
1561   // Generate thread id value and cache this value for use across the
1562   // function.
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (!Elem.second.ServiceInsertPt)
1565     setLocThreadIdInsertPt(CGF);
1566   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1567   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1568   llvm::CallInst *Call = CGF.Builder.CreateCall(
1569       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1570                                             OMPRTL___kmpc_global_thread_num),
1571       emitUpdateLocation(CGF, Loc));
1572   Call->setCallingConv(CGF.getRuntimeCC());
1573   Elem.second.ThreadID = Call;
1574   return Call;
1575 }
1576 
1577 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1578   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1579   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1580     clearLocThreadIdInsertPt(CGF);
1581     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1582   }
1583   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1584     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1585       UDRMap.erase(D);
1586     FunctionUDRMap.erase(CGF.CurFn);
1587   }
1588   auto I = FunctionUDMMap.find(CGF.CurFn);
1589   if (I != FunctionUDMMap.end()) {
1590     for(const auto *D : I->second)
1591       UDMMap.erase(D);
1592     FunctionUDMMap.erase(I);
1593   }
1594   LastprivateConditionalToTypes.erase(CGF.CurFn);
1595 }
1596 
1597 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1598   return IdentTy->getPointerTo();
1599 }
1600 
1601 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1602   if (!Kmpc_MicroTy) {
1603     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1604     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1605                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1606     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1607   }
1608   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1616                                             : "__kmpc_for_static_init_4u")
1617                                 : (IVSigned ? "__kmpc_for_static_init_8"
1618                                             : "__kmpc_for_static_init_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     CGM.Int32Ty,                               // schedtype
1625     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1626     PtrTy,                                     // p_lower
1627     PtrTy,                                     // p_upper
1628     PtrTy,                                     // p_stride
1629     ITy,                                       // incr
1630     ITy                                        // chunk
1631   };
1632   auto *FnTy =
1633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1634   return CGM.CreateRuntimeFunction(FnTy, Name);
1635 }
1636 
1637 llvm::FunctionCallee
1638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1639   assert((IVSize == 32 || IVSize == 64) &&
1640          "IV size is not compatible with the omp runtime");
1641   StringRef Name =
1642       IVSize == 32
1643           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1644           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1646   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1647                                CGM.Int32Ty,           // tid
1648                                CGM.Int32Ty,           // schedtype
1649                                ITy,                   // lower
1650                                ITy,                   // upper
1651                                ITy,                   // stride
1652                                ITy                    // chunk
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 llvm::FunctionCallee
1660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1661   assert((IVSize == 32 || IVSize == 64) &&
1662          "IV size is not compatible with the omp runtime");
1663   StringRef Name =
1664       IVSize == 32
1665           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1666           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1667   llvm::Type *TypeParams[] = {
1668       getIdentTyPointerTy(), // loc
1669       CGM.Int32Ty,           // tid
1670   };
1671   auto *FnTy =
1672       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1673   return CGM.CreateRuntimeFunction(FnTy, Name);
1674 }
1675 
1676 llvm::FunctionCallee
1677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1678   assert((IVSize == 32 || IVSize == 64) &&
1679          "IV size is not compatible with the omp runtime");
1680   StringRef Name =
1681       IVSize == 32
1682           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1683           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1684   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1685   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1686   llvm::Type *TypeParams[] = {
1687     getIdentTyPointerTy(),                     // loc
1688     CGM.Int32Ty,                               // tid
1689     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1690     PtrTy,                                     // p_lower
1691     PtrTy,                                     // p_upper
1692     PtrTy                                      // p_stride
1693   };
1694   auto *FnTy =
1695       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1696   return CGM.CreateRuntimeFunction(FnTy, Name);
1697 }
1698 
1699 /// Obtain information that uniquely identifies a target entry. This
1700 /// consists of the file and device IDs as well as line number associated with
1701 /// the relevant entry source location.
1702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1703                                      unsigned &DeviceID, unsigned &FileID,
1704                                      unsigned &LineNum) {
1705   SourceManager &SM = C.getSourceManager();
1706 
1707   // The loc should be always valid and have a file ID (the user cannot use
1708   // #pragma directives in macros)
1709 
1710   assert(Loc.isValid() && "Source location is expected to be always valid.");
1711 
1712   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1713   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1714 
1715   llvm::sys::fs::UniqueID ID;
1716   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1717     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1718         << PLoc.getFilename() << EC.message();
1719 
1720   DeviceID = ID.getDevice();
1721   FileID = ID.getFile();
1722   LineNum = PLoc.getLine();
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1726   if (CGM.getLangOpts().OpenMPSimd)
1727     return Address::invalid();
1728   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1729       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1730   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1731               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1732                HasRequiresUnifiedSharedMemory))) {
1733     SmallString<64> PtrName;
1734     {
1735       llvm::raw_svector_ostream OS(PtrName);
1736       OS << CGM.getMangledName(GlobalDecl(VD));
1737       if (!VD->isExternallyVisible()) {
1738         unsigned DeviceID, FileID, Line;
1739         getTargetEntryUniqueInfo(CGM.getContext(),
1740                                  VD->getCanonicalDecl()->getBeginLoc(),
1741                                  DeviceID, FileID, Line);
1742         OS << llvm::format("_%x", FileID);
1743       }
1744       OS << "_decl_tgt_ref_ptr";
1745     }
1746     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1747     if (!Ptr) {
1748       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1749       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1750                                         PtrName);
1751 
1752       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1753       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1754 
1755       if (!CGM.getLangOpts().OpenMPIsDevice)
1756         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1757       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1758     }
1759     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1760   }
1761   return Address::invalid();
1762 }
1763 
1764 llvm::Constant *
1765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1766   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1767          !CGM.getContext().getTargetInfo().isTLSSupported());
1768   // Lookup the entry, lazily creating it if necessary.
1769   std::string Suffix = getName({"cache", ""});
1770   return getOrCreateInternalVariable(
1771       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1772 }
1773 
1774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1775                                                 const VarDecl *VD,
1776                                                 Address VDAddr,
1777                                                 SourceLocation Loc) {
1778   if (CGM.getLangOpts().OpenMPUseTLS &&
1779       CGM.getContext().getTargetInfo().isTLSSupported())
1780     return VDAddr;
1781 
1782   llvm::Type *VarTy = VDAddr.getElementType();
1783   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1784                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1785                                                        CGM.Int8PtrTy),
1786                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1787                          getOrCreateThreadPrivateCache(VD)};
1788   return Address(CGF.EmitRuntimeCall(
1789                      OMPBuilder.getOrCreateRuntimeFunction(
1790                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1791                      Args),
1792                  VDAddr.getAlignment());
1793 }
1794 
1795 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1796     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1797     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1798   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1799   // library.
1800   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1801   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1802                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1803                       OMPLoc);
1804   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1805   // to register constructor/destructor for variable.
1806   llvm::Value *Args[] = {
1807       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1808       Ctor, CopyCtor, Dtor};
1809   CGF.EmitRuntimeCall(
1810       OMPBuilder.getOrCreateRuntimeFunction(
1811           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1812       Args);
1813 }
1814 
1815 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1816     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1817     bool PerformInit, CodeGenFunction *CGF) {
1818   if (CGM.getLangOpts().OpenMPUseTLS &&
1819       CGM.getContext().getTargetInfo().isTLSSupported())
1820     return nullptr;
1821 
1822   VD = VD->getDefinition(CGM.getContext());
1823   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1824     QualType ASTTy = VD->getType();
1825 
1826     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1827     const Expr *Init = VD->getAnyInitializer();
1828     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1829       // Generate function that re-emits the declaration's initializer into the
1830       // threadprivate copy of the variable VD
1831       CodeGenFunction CtorCGF(CGM);
1832       FunctionArgList Args;
1833       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1834                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1835                             ImplicitParamDecl::Other);
1836       Args.push_back(&Dst);
1837 
1838       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1839           CGM.getContext().VoidPtrTy, Args);
1840       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1841       std::string Name = getName({"__kmpc_global_ctor_", ""});
1842       llvm::Function *Fn =
1843           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1844       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1845                             Args, Loc, Loc);
1846       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1847           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1848           CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1850       Arg = CtorCGF.Builder.CreateElementBitCast(
1851           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1852       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1853                                /*IsInitializer=*/true);
1854       ArgVal = CtorCGF.EmitLoadOfScalar(
1855           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1856           CGM.getContext().VoidPtrTy, Dst.getLocation());
1857       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1858       CtorCGF.FinishFunction();
1859       Ctor = Fn;
1860     }
1861     if (VD->getType().isDestructedType() != QualType::DK_none) {
1862       // Generate function that emits destructor call for the threadprivate copy
1863       // of the variable VD
1864       CodeGenFunction DtorCGF(CGM);
1865       FunctionArgList Args;
1866       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1867                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1868                             ImplicitParamDecl::Other);
1869       Args.push_back(&Dst);
1870 
1871       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1872           CGM.getContext().VoidTy, Args);
1873       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1874       std::string Name = getName({"__kmpc_global_dtor_", ""});
1875       llvm::Function *Fn =
1876           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1877       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1878       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1879                             Loc, Loc);
1880       // Create a scope with an artificial location for the body of this function.
1881       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1882       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1883           DtorCGF.GetAddrOfLocalVar(&Dst),
1884           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1885       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1886                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1887                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1888       DtorCGF.FinishFunction();
1889       Dtor = Fn;
1890     }
1891     // Do not emit init function if it is not required.
1892     if (!Ctor && !Dtor)
1893       return nullptr;
1894 
1895     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1896     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1897                                                /*isVarArg=*/false)
1898                            ->getPointerTo();
1899     // Copying constructor for the threadprivate variable.
1900     // Must be NULL - reserved by runtime, but currently it requires that this
1901     // parameter is always NULL. Otherwise it fires assertion.
1902     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1903     if (Ctor == nullptr) {
1904       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905                                              /*isVarArg=*/false)
1906                          ->getPointerTo();
1907       Ctor = llvm::Constant::getNullValue(CtorTy);
1908     }
1909     if (Dtor == nullptr) {
1910       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1911                                              /*isVarArg=*/false)
1912                          ->getPointerTo();
1913       Dtor = llvm::Constant::getNullValue(DtorTy);
1914     }
1915     if (!CGF) {
1916       auto *InitFunctionTy =
1917           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1918       std::string Name = getName({"__omp_threadprivate_init_", ""});
1919       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1920           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1921       CodeGenFunction InitCGF(CGM);
1922       FunctionArgList ArgList;
1923       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1924                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1925                             Loc, Loc);
1926       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1927       InitCGF.FinishFunction();
1928       return InitFunction;
1929     }
1930     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1931   }
1932   return nullptr;
1933 }
1934 
1935 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1936                                                      llvm::GlobalVariable *Addr,
1937                                                      bool PerformInit) {
1938   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1939       !CGM.getLangOpts().OpenMPIsDevice)
1940     return false;
1941   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1942       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1943   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1944       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1945        HasRequiresUnifiedSharedMemory))
1946     return CGM.getLangOpts().OpenMPIsDevice;
1947   VD = VD->getDefinition(CGM.getContext());
1948   assert(VD && "Unknown VarDecl");
1949 
1950   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1951     return CGM.getLangOpts().OpenMPIsDevice;
1952 
1953   QualType ASTTy = VD->getType();
1954   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1955 
1956   // Produce the unique prefix to identify the new target regions. We use
1957   // the source location of the variable declaration which we know to not
1958   // conflict with any target region.
1959   unsigned DeviceID;
1960   unsigned FileID;
1961   unsigned Line;
1962   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1963   SmallString<128> Buffer, Out;
1964   {
1965     llvm::raw_svector_ostream OS(Buffer);
1966     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1967        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1968   }
1969 
1970   const Expr *Init = VD->getAnyInitializer();
1971   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1972     llvm::Constant *Ctor;
1973     llvm::Constant *ID;
1974     if (CGM.getLangOpts().OpenMPIsDevice) {
1975       // Generate function that re-emits the declaration's initializer into
1976       // the threadprivate copy of the variable VD
1977       CodeGenFunction CtorCGF(CGM);
1978 
1979       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1980       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1981       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1982           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1983       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1984       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1985                             FunctionArgList(), Loc, Loc);
1986       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1987       CtorCGF.EmitAnyExprToMem(Init,
1988                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1989                                Init->getType().getQualifiers(),
1990                                /*IsInitializer=*/true);
1991       CtorCGF.FinishFunction();
1992       Ctor = Fn;
1993       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1994       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1995     } else {
1996       Ctor = new llvm::GlobalVariable(
1997           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1998           llvm::GlobalValue::PrivateLinkage,
1999           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2000       ID = Ctor;
2001     }
2002 
2003     // Register the information for the entry associated with the constructor.
2004     Out.clear();
2005     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2006         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2007         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2008   }
2009   if (VD->getType().isDestructedType() != QualType::DK_none) {
2010     llvm::Constant *Dtor;
2011     llvm::Constant *ID;
2012     if (CGM.getLangOpts().OpenMPIsDevice) {
2013       // Generate function that emits destructor call for the threadprivate
2014       // copy of the variable VD
2015       CodeGenFunction DtorCGF(CGM);
2016 
2017       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2018       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2019       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2020           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2021       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2022       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2023                             FunctionArgList(), Loc, Loc);
2024       // Create a scope with an artificial location for the body of this
2025       // function.
2026       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2027       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2028                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2029                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2030       DtorCGF.FinishFunction();
2031       Dtor = Fn;
2032       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2033       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2034     } else {
2035       Dtor = new llvm::GlobalVariable(
2036           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2037           llvm::GlobalValue::PrivateLinkage,
2038           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2039       ID = Dtor;
2040     }
2041     // Register the information for the entry associated with the destructor.
2042     Out.clear();
2043     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2044         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2045         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2046   }
2047   return CGM.getLangOpts().OpenMPIsDevice;
2048 }
2049 
2050 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2051                                                           QualType VarType,
2052                                                           StringRef Name) {
2053   std::string Suffix = getName({"artificial", ""});
2054   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2055   llvm::Value *GAddr =
2056       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2057   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2058       CGM.getTarget().isTLSSupported()) {
2059     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2060     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2061   }
2062   std::string CacheSuffix = getName({"cache", ""});
2063   llvm::Value *Args[] = {
2064       emitUpdateLocation(CGF, SourceLocation()),
2065       getThreadID(CGF, SourceLocation()),
2066       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2067       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2068                                 /*isSigned=*/false),
2069       getOrCreateInternalVariable(
2070           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2071   return Address(
2072       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2073           CGF.EmitRuntimeCall(
2074               OMPBuilder.getOrCreateRuntimeFunction(
2075                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2076               Args),
2077           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2078       CGM.getContext().getTypeAlignInChars(VarType));
2079 }
2080 
2081 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2082                                    const RegionCodeGenTy &ThenGen,
2083                                    const RegionCodeGenTy &ElseGen) {
2084   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2085 
2086   // If the condition constant folds and can be elided, try to avoid emitting
2087   // the condition and the dead arm of the if/else.
2088   bool CondConstant;
2089   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2090     if (CondConstant)
2091       ThenGen(CGF);
2092     else
2093       ElseGen(CGF);
2094     return;
2095   }
2096 
2097   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2098   // emit the conditional branch.
2099   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2100   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2101   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2102   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2103 
2104   // Emit the 'then' code.
2105   CGF.EmitBlock(ThenBlock);
2106   ThenGen(CGF);
2107   CGF.EmitBranch(ContBlock);
2108   // Emit the 'else' code if present.
2109   // There is no need to emit line number for unconditional branch.
2110   (void)ApplyDebugLocation::CreateEmpty(CGF);
2111   CGF.EmitBlock(ElseBlock);
2112   ElseGen(CGF);
2113   // There is no need to emit line number for unconditional branch.
2114   (void)ApplyDebugLocation::CreateEmpty(CGF);
2115   CGF.EmitBranch(ContBlock);
2116   // Emit the continuation block for code after the if.
2117   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2118 }
2119 
2120 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2121                                        llvm::Function *OutlinedFn,
2122                                        ArrayRef<llvm::Value *> CapturedVars,
2123                                        const Expr *IfCond) {
2124   if (!CGF.HaveInsertPoint())
2125     return;
2126   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2127   auto &M = CGM.getModule();
2128   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2129                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2130     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2131     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2132     llvm::Value *Args[] = {
2133         RTLoc,
2134         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2135         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2136     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2137     RealArgs.append(std::begin(Args), std::end(Args));
2138     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2139 
2140     llvm::FunctionCallee RTLFn =
2141         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2142     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2143   };
2144   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2145                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2146     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2147     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2148     // Build calls:
2149     // __kmpc_serialized_parallel(&Loc, GTid);
2150     llvm::Value *Args[] = {RTLoc, ThreadID};
2151     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2152                             M, OMPRTL___kmpc_serialized_parallel),
2153                         Args);
2154 
2155     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2156     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2157     Address ZeroAddrBound =
2158         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2159                                          /*Name=*/".bound.zero.addr");
2160     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2161     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2162     // ThreadId for serialized parallels is 0.
2163     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2164     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2165     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2166     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2167 
2168     // __kmpc_end_serialized_parallel(&Loc, GTid);
2169     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2170     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2171                             M, OMPRTL___kmpc_end_serialized_parallel),
2172                         EndArgs);
2173   };
2174   if (IfCond) {
2175     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2176   } else {
2177     RegionCodeGenTy ThenRCG(ThenGen);
2178     ThenRCG(CGF);
2179   }
2180 }
2181 
2182 // If we're inside an (outlined) parallel region, use the region info's
2183 // thread-ID variable (it is passed in a first argument of the outlined function
2184 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2185 // regular serial code region, get thread ID by calling kmp_int32
2186 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2187 // return the address of that temp.
2188 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2189                                              SourceLocation Loc) {
2190   if (auto *OMPRegionInfo =
2191           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2192     if (OMPRegionInfo->getThreadIDVariable())
2193       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2194 
2195   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2196   QualType Int32Ty =
2197       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2198   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2199   CGF.EmitStoreOfScalar(ThreadID,
2200                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2201 
2202   return ThreadIDTemp;
2203 }
2204 
2205 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2206     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2207   SmallString<256> Buffer;
2208   llvm::raw_svector_ostream Out(Buffer);
2209   Out << Name;
2210   StringRef RuntimeName = Out.str();
2211   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2212   if (Elem.second) {
2213     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2214            "OMP internal variable has different type than requested");
2215     return &*Elem.second;
2216   }
2217 
2218   return Elem.second = new llvm::GlobalVariable(
2219              CGM.getModule(), Ty, /*IsConstant*/ false,
2220              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2221              Elem.first(), /*InsertBefore=*/nullptr,
2222              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2223 }
2224 
2225 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2226   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2227   std::string Name = getName({Prefix, "var"});
2228   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2229 }
2230 
2231 namespace {
2232 /// Common pre(post)-action for different OpenMP constructs.
2233 class CommonActionTy final : public PrePostActionTy {
2234   llvm::FunctionCallee EnterCallee;
2235   ArrayRef<llvm::Value *> EnterArgs;
2236   llvm::FunctionCallee ExitCallee;
2237   ArrayRef<llvm::Value *> ExitArgs;
2238   bool Conditional;
2239   llvm::BasicBlock *ContBlock = nullptr;
2240 
2241 public:
2242   CommonActionTy(llvm::FunctionCallee EnterCallee,
2243                  ArrayRef<llvm::Value *> EnterArgs,
2244                  llvm::FunctionCallee ExitCallee,
2245                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2246       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2247         ExitArgs(ExitArgs), Conditional(Conditional) {}
2248   void Enter(CodeGenFunction &CGF) override {
2249     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2250     if (Conditional) {
2251       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2252       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2253       ContBlock = CGF.createBasicBlock("omp_if.end");
2254       // Generate the branch (If-stmt)
2255       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2256       CGF.EmitBlock(ThenBlock);
2257     }
2258   }
2259   void Done(CodeGenFunction &CGF) {
2260     // Emit the rest of blocks/branches
2261     CGF.EmitBranch(ContBlock);
2262     CGF.EmitBlock(ContBlock, true);
2263   }
2264   void Exit(CodeGenFunction &CGF) override {
2265     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2266   }
2267 };
2268 } // anonymous namespace
2269 
2270 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2271                                          StringRef CriticalName,
2272                                          const RegionCodeGenTy &CriticalOpGen,
2273                                          SourceLocation Loc, const Expr *Hint) {
2274   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2275   // CriticalOpGen();
2276   // __kmpc_end_critical(ident_t *, gtid, Lock);
2277   // Prepare arguments and build a call to __kmpc_critical
2278   if (!CGF.HaveInsertPoint())
2279     return;
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2281                          getCriticalRegionLock(CriticalName)};
2282   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2283                                                 std::end(Args));
2284   if (Hint) {
2285     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2286         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2287   }
2288   CommonActionTy Action(
2289       OMPBuilder.getOrCreateRuntimeFunction(
2290           CGM.getModule(),
2291           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2292       EnterArgs,
2293       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2294                                             OMPRTL___kmpc_end_critical),
2295       Args);
2296   CriticalOpGen.setAction(Action);
2297   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2298 }
2299 
2300 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2301                                        const RegionCodeGenTy &MasterOpGen,
2302                                        SourceLocation Loc) {
2303   if (!CGF.HaveInsertPoint())
2304     return;
2305   // if(__kmpc_master(ident_t *, gtid)) {
2306   //   MasterOpGen();
2307   //   __kmpc_end_master(ident_t *, gtid);
2308   // }
2309   // Prepare arguments and build a call to __kmpc_master
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_master),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_master),
2316                         Args,
2317                         /*Conditional=*/true);
2318   MasterOpGen.setAction(Action);
2319   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2320   Action.Done(CGF);
2321 }
2322 
2323 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2324                                         SourceLocation Loc) {
2325   if (!CGF.HaveInsertPoint())
2326     return;
2327   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328     OMPBuilder.CreateTaskyield(CGF.Builder);
2329   } else {
2330     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2331     llvm::Value *Args[] = {
2332         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2333         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2334     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2335                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2336                         Args);
2337   }
2338 
2339   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2340     Region->emitUntiedSwitch(CGF);
2341 }
2342 
2343 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2344                                           const RegionCodeGenTy &TaskgroupOpGen,
2345                                           SourceLocation Loc) {
2346   if (!CGF.HaveInsertPoint())
2347     return;
2348   // __kmpc_taskgroup(ident_t *, gtid);
2349   // TaskgroupOpGen();
2350   // __kmpc_end_taskgroup(ident_t *, gtid);
2351   // Prepare arguments and build a call to __kmpc_taskgroup
2352   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2353   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2354                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2355                         Args,
2356                         OMPBuilder.getOrCreateRuntimeFunction(
2357                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2358                         Args);
2359   TaskgroupOpGen.setAction(Action);
2360   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2361 }
2362 
2363 /// Given an array of pointers to variables, project the address of a
2364 /// given variable.
2365 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2366                                       unsigned Index, const VarDecl *Var) {
2367   // Pull out the pointer to the variable.
2368   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2369   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2370 
2371   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2372   Addr = CGF.Builder.CreateElementBitCast(
2373       Addr, CGF.ConvertTypeForMem(Var->getType()));
2374   return Addr;
2375 }
2376 
2377 static llvm::Value *emitCopyprivateCopyFunction(
2378     CodeGenModule &CGM, llvm::Type *ArgsType,
2379     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2380     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2381     SourceLocation Loc) {
2382   ASTContext &C = CGM.getContext();
2383   // void copy_func(void *LHSArg, void *RHSArg);
2384   FunctionArgList Args;
2385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2386                            ImplicitParamDecl::Other);
2387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   Args.push_back(&LHSArg);
2390   Args.push_back(&RHSArg);
2391   const auto &CGFI =
2392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2393   std::string Name =
2394       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2396                                     llvm::GlobalValue::InternalLinkage, Name,
2397                                     &CGM.getModule());
2398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2399   Fn->setDoesNotRecurse();
2400   CodeGenFunction CGF(CGM);
2401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2402   // Dest = (void*[n])(LHSArg);
2403   // Src = (void*[n])(RHSArg);
2404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2406       ArgsType), CGF.getPointerAlign());
2407   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2409       ArgsType), CGF.getPointerAlign());
2410   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2411   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2412   // ...
2413   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2414   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2415     const auto *DestVar =
2416         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2417     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2418 
2419     const auto *SrcVar =
2420         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2421     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2422 
2423     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2424     QualType Type = VD->getType();
2425     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2426   }
2427   CGF.FinishFunction();
2428   return Fn;
2429 }
2430 
2431 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2432                                        const RegionCodeGenTy &SingleOpGen,
2433                                        SourceLocation Loc,
2434                                        ArrayRef<const Expr *> CopyprivateVars,
2435                                        ArrayRef<const Expr *> SrcExprs,
2436                                        ArrayRef<const Expr *> DstExprs,
2437                                        ArrayRef<const Expr *> AssignmentOps) {
2438   if (!CGF.HaveInsertPoint())
2439     return;
2440   assert(CopyprivateVars.size() == SrcExprs.size() &&
2441          CopyprivateVars.size() == DstExprs.size() &&
2442          CopyprivateVars.size() == AssignmentOps.size());
2443   ASTContext &C = CGM.getContext();
2444   // int32 did_it = 0;
2445   // if(__kmpc_single(ident_t *, gtid)) {
2446   //   SingleOpGen();
2447   //   __kmpc_end_single(ident_t *, gtid);
2448   //   did_it = 1;
2449   // }
2450   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2451   // <copy_func>, did_it);
2452 
2453   Address DidIt = Address::invalid();
2454   if (!CopyprivateVars.empty()) {
2455     // int32 did_it = 0;
2456     QualType KmpInt32Ty =
2457         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2458     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2459     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2460   }
2461   // Prepare arguments and build a call to __kmpc_single
2462   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2463   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2464                             CGM.getModule(), OMPRTL___kmpc_single),
2465                         Args,
2466                         OMPBuilder.getOrCreateRuntimeFunction(
2467                             CGM.getModule(), OMPRTL___kmpc_end_single),
2468                         Args,
2469                         /*Conditional=*/true);
2470   SingleOpGen.setAction(Action);
2471   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2472   if (DidIt.isValid()) {
2473     // did_it = 1;
2474     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2475   }
2476   Action.Done(CGF);
2477   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2478   // <copy_func>, did_it);
2479   if (DidIt.isValid()) {
2480     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2481     QualType CopyprivateArrayTy = C.getConstantArrayType(
2482         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2483         /*IndexTypeQuals=*/0);
2484     // Create a list of all private variables for copyprivate.
2485     Address CopyprivateList =
2486         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2487     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2488       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2489       CGF.Builder.CreateStore(
2490           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2491               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2492               CGF.VoidPtrTy),
2493           Elem);
2494     }
2495     // Build function that copies private values from single region to all other
2496     // threads in the corresponding parallel region.
2497     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2498         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2499         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2500     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2501     Address CL =
2502       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2503                                                       CGF.VoidPtrTy);
2504     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2505     llvm::Value *Args[] = {
2506         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2507         getThreadID(CGF, Loc),        // i32 <gtid>
2508         BufSize,                      // size_t <buf_size>
2509         CL.getPointer(),              // void *<copyprivate list>
2510         CpyFn,                        // void (*) (void *, void *) <copy_func>
2511         DidItVal                      // i32 did_it
2512     };
2513     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2514                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2515                         Args);
2516   }
2517 }
2518 
2519 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2520                                         const RegionCodeGenTy &OrderedOpGen,
2521                                         SourceLocation Loc, bool IsThreads) {
2522   if (!CGF.HaveInsertPoint())
2523     return;
2524   // __kmpc_ordered(ident_t *, gtid);
2525   // OrderedOpGen();
2526   // __kmpc_end_ordered(ident_t *, gtid);
2527   // Prepare arguments and build a call to __kmpc_ordered
2528   if (IsThreads) {
2529     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2530     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2531                               CGM.getModule(), OMPRTL___kmpc_ordered),
2532                           Args,
2533                           OMPBuilder.getOrCreateRuntimeFunction(
2534                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2535                           Args);
2536     OrderedOpGen.setAction(Action);
2537     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2538     return;
2539   }
2540   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2541 }
2542 
2543 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2544   unsigned Flags;
2545   if (Kind == OMPD_for)
2546     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2547   else if (Kind == OMPD_sections)
2548     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2549   else if (Kind == OMPD_single)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2551   else if (Kind == OMPD_barrier)
2552     Flags = OMP_IDENT_BARRIER_EXPL;
2553   else
2554     Flags = OMP_IDENT_BARRIER_IMPL;
2555   return Flags;
2556 }
2557 
2558 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2559     CodeGenFunction &CGF, const OMPLoopDirective &S,
2560     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2561   // Check if the loop directive is actually a doacross loop directive. In this
2562   // case choose static, 1 schedule.
2563   if (llvm::any_of(
2564           S.getClausesOfKind<OMPOrderedClause>(),
2565           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2566     ScheduleKind = OMPC_SCHEDULE_static;
2567     // Chunk size is 1 in this case.
2568     llvm::APInt ChunkSize(32, 1);
2569     ChunkExpr = IntegerLiteral::Create(
2570         CGF.getContext(), ChunkSize,
2571         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2572         SourceLocation());
2573   }
2574 }
2575 
2576 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2577                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2578                                       bool ForceSimpleCall) {
2579   // Check if we should use the OMPBuilder
2580   auto *OMPRegionInfo =
2581       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2582   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2583     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2584         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2585     return;
2586   }
2587 
2588   if (!CGF.HaveInsertPoint())
2589     return;
2590   // Build call __kmpc_cancel_barrier(loc, thread_id);
2591   // Build call __kmpc_barrier(loc, thread_id);
2592   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2593   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2594   // thread_id);
2595   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2596                          getThreadID(CGF, Loc)};
2597   if (OMPRegionInfo) {
2598     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2599       llvm::Value *Result = CGF.EmitRuntimeCall(
2600           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2601                                                 OMPRTL___kmpc_cancel_barrier),
2602           Args);
2603       if (EmitChecks) {
2604         // if (__kmpc_cancel_barrier()) {
2605         //   exit from construct;
2606         // }
2607         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2608         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2609         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2610         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2611         CGF.EmitBlock(ExitBB);
2612         //   exit from construct;
2613         CodeGenFunction::JumpDest CancelDestination =
2614             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2615         CGF.EmitBranchThroughCleanup(CancelDestination);
2616         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2617       }
2618       return;
2619     }
2620   }
2621   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2622                           CGM.getModule(), OMPRTL___kmpc_barrier),
2623                       Args);
2624 }
2625 
2626 /// Map the OpenMP loop schedule to the runtime enumeration.
2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2628                                           bool Chunked, bool Ordered) {
2629   switch (ScheduleKind) {
2630   case OMPC_SCHEDULE_static:
2631     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2632                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2633   case OMPC_SCHEDULE_dynamic:
2634     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2635   case OMPC_SCHEDULE_guided:
2636     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2637   case OMPC_SCHEDULE_runtime:
2638     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2639   case OMPC_SCHEDULE_auto:
2640     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2641   case OMPC_SCHEDULE_unknown:
2642     assert(!Chunked && "chunk was specified but schedule kind not known");
2643     return Ordered ? OMP_ord_static : OMP_sch_static;
2644   }
2645   llvm_unreachable("Unexpected runtime schedule");
2646 }
2647 
2648 /// Map the OpenMP distribute schedule to the runtime enumeration.
2649 static OpenMPSchedType
2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2651   // only static is allowed for dist_schedule
2652   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                          bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticNonchunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2669                                       bool Chunked) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2672   return Schedule == OMP_sch_static_chunked;
2673 }
2674 
2675 bool CGOpenMPRuntime::isStaticChunked(
2676     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2677   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2678   return Schedule == OMP_dist_sch_static_chunked;
2679 }
2680 
2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2682   OpenMPSchedType Schedule =
2683       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2684   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2685   return Schedule != OMP_sch_static;
2686 }
2687 
2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2689                                   OpenMPScheduleClauseModifier M1,
2690                                   OpenMPScheduleClauseModifier M2) {
2691   int Modifier = 0;
2692   switch (M1) {
2693   case OMPC_SCHEDULE_MODIFIER_monotonic:
2694     Modifier = OMP_sch_modifier_monotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2697     Modifier = OMP_sch_modifier_nonmonotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_simd:
2700     if (Schedule == OMP_sch_static_chunked)
2701       Schedule = OMP_sch_static_balanced_chunked;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_last:
2704   case OMPC_SCHEDULE_MODIFIER_unknown:
2705     break;
2706   }
2707   switch (M2) {
2708   case OMPC_SCHEDULE_MODIFIER_monotonic:
2709     Modifier = OMP_sch_modifier_monotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2712     Modifier = OMP_sch_modifier_nonmonotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_simd:
2715     if (Schedule == OMP_sch_static_chunked)
2716       Schedule = OMP_sch_static_balanced_chunked;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_last:
2719   case OMPC_SCHEDULE_MODIFIER_unknown:
2720     break;
2721   }
2722   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2723   // If the static schedule kind is specified or if the ordered clause is
2724   // specified, and if the nonmonotonic modifier is not specified, the effect is
2725   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2726   // modifier is specified, the effect is as if the nonmonotonic modifier is
2727   // specified.
2728   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2729     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2730           Schedule == OMP_sch_static_balanced_chunked ||
2731           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2732           Schedule == OMP_dist_sch_static_chunked ||
2733           Schedule == OMP_dist_sch_static))
2734       Modifier = OMP_sch_modifier_nonmonotonic;
2735   }
2736   return Schedule | Modifier;
2737 }
2738 
2739 void CGOpenMPRuntime::emitForDispatchInit(
2740     CodeGenFunction &CGF, SourceLocation Loc,
2741     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2742     bool Ordered, const DispatchRTInput &DispatchValues) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   OpenMPSchedType Schedule = getRuntimeSchedule(
2746       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2747   assert(Ordered ||
2748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2750           Schedule != OMP_sch_static_balanced_chunked));
2751   // Call __kmpc_dispatch_init(
2752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2755 
2756   // If the Chunk was not specified in the clause - use default value 1.
2757   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2758                                             : CGF.Builder.getIntN(IVSize, 1);
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, Loc),
2761       getThreadID(CGF, Loc),
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(
2763           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2764       DispatchValues.LB,                                     // Lower
2765       DispatchValues.UB,                                     // Upper
2766       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2767       Chunk                                                  // Chunk
2768   };
2769   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2770 }
2771 
2772 static void emitForStaticInitCall(
2773     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2774     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2775     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2776     const CGOpenMPRuntime::StaticRTInput &Values) {
2777   if (!CGF.HaveInsertPoint())
2778     return;
2779 
2780   assert(!Values.Ordered);
2781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2782          Schedule == OMP_sch_static_balanced_chunked ||
2783          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2784          Schedule == OMP_dist_sch_static ||
2785          Schedule == OMP_dist_sch_static_chunked);
2786 
2787   // Call __kmpc_for_static_init(
2788   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2789   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2790   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2791   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2792   llvm::Value *Chunk = Values.Chunk;
2793   if (Chunk == nullptr) {
2794     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2795             Schedule == OMP_dist_sch_static) &&
2796            "expected static non-chunked schedule");
2797     // If the Chunk was not specified in the clause - use default value 1.
2798     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2799   } else {
2800     assert((Schedule == OMP_sch_static_chunked ||
2801             Schedule == OMP_sch_static_balanced_chunked ||
2802             Schedule == OMP_ord_static_chunked ||
2803             Schedule == OMP_dist_sch_static_chunked) &&
2804            "expected static chunked schedule");
2805   }
2806   llvm::Value *Args[] = {
2807       UpdateLocation,
2808       ThreadId,
2809       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2810                                                   M2)), // Schedule type
2811       Values.IL.getPointer(),                           // &isLastIter
2812       Values.LB.getPointer(),                           // &LB
2813       Values.UB.getPointer(),                           // &UB
2814       Values.ST.getPointer(),                           // &Stride
2815       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2816       Chunk                                             // Chunk
2817   };
2818   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2822                                         SourceLocation Loc,
2823                                         OpenMPDirectiveKind DKind,
2824                                         const OpenMPScheduleTy &ScheduleKind,
2825                                         const StaticRTInput &Values) {
2826   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2827       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2828   assert(isOpenMPWorksharingDirective(DKind) &&
2829          "Expected loop-based or sections-based directive.");
2830   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2831                                              isOpenMPLoopDirective(DKind)
2832                                                  ? OMP_IDENT_WORK_LOOP
2833                                                  : OMP_IDENT_WORK_SECTIONS);
2834   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2835   llvm::FunctionCallee StaticInitFunction =
2836       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2839                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2840 }
2841 
2842 void CGOpenMPRuntime::emitDistributeStaticInit(
2843     CodeGenFunction &CGF, SourceLocation Loc,
2844     OpenMPDistScheduleClauseKind SchedKind,
2845     const CGOpenMPRuntime::StaticRTInput &Values) {
2846   OpenMPSchedType ScheduleNum =
2847       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2848   llvm::Value *UpdatedLocation =
2849       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2850   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2851   llvm::FunctionCallee StaticInitFunction =
2852       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2854                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2855                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2856 }
2857 
2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2859                                           SourceLocation Loc,
2860                                           OpenMPDirectiveKind DKind) {
2861   if (!CGF.HaveInsertPoint())
2862     return;
2863   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2864   llvm::Value *Args[] = {
2865       emitUpdateLocation(CGF, Loc,
2866                          isOpenMPDistributeDirective(DKind)
2867                              ? OMP_IDENT_WORK_DISTRIBUTE
2868                              : isOpenMPLoopDirective(DKind)
2869                                    ? OMP_IDENT_WORK_LOOP
2870                                    : OMP_IDENT_WORK_SECTIONS),
2871       getThreadID(CGF, Loc)};
2872   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2873   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2874                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                       Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2945     OMPBuilder.CreateFlush(CGF.Builder);
2946   } else {
2947     if (!CGF.HaveInsertPoint())
2948       return;
2949     // Build call void __kmpc_flush(ident_t *loc)
2950     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2951                             CGM.getModule(), OMPRTL___kmpc_flush),
2952                         emitUpdateLocation(CGF, Loc));
2953   }
2954 }
2955 
2956 namespace {
2957 /// Indexes of fields for type kmp_task_t.
2958 enum KmpTaskTFields {
2959   /// List of shared variables.
2960   KmpTaskTShareds,
2961   /// Task routine.
2962   KmpTaskTRoutine,
2963   /// Partition id for the untied tasks.
2964   KmpTaskTPartId,
2965   /// Function with call of destructors for private variables.
2966   Data1,
2967   /// Task priority.
2968   Data2,
2969   /// (Taskloops only) Lower bound.
2970   KmpTaskTLowerBound,
2971   /// (Taskloops only) Upper bound.
2972   KmpTaskTUpperBound,
2973   /// (Taskloops only) Stride.
2974   KmpTaskTStride,
2975   /// (Taskloops only) Is last iteration flag.
2976   KmpTaskTLastIter,
2977   /// (Taskloops only) Reduction data.
2978   KmpTaskTReductions,
2979 };
2980 } // anonymous namespace
2981 
2982 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2983   return OffloadEntriesTargetRegion.empty() &&
2984          OffloadEntriesDeviceGlobalVar.empty();
2985 }
2986 
2987 /// Initialize target region entry.
2988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2989     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2990                                     StringRef ParentName, unsigned LineNum,
2991                                     unsigned Order) {
2992   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2993                                              "only required for the device "
2994                                              "code generation.");
2995   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2996       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2997                                    OMPTargetRegionEntryTargetRegion);
2998   ++OffloadingEntriesNum;
2999 }
3000 
3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3002     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3003                                   StringRef ParentName, unsigned LineNum,
3004                                   llvm::Constant *Addr, llvm::Constant *ID,
3005                                   OMPTargetRegionEntryKind Flags) {
3006   // If we are emitting code for a target, the entry is already initialized,
3007   // only has to be registered.
3008   if (CGM.getLangOpts().OpenMPIsDevice) {
3009     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3010       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3011           DiagnosticsEngine::Error,
3012           "Unable to find target region on line '%0' in the device code.");
3013       CGM.getDiags().Report(DiagID) << LineNum;
3014       return;
3015     }
3016     auto &Entry =
3017         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3018     assert(Entry.isValid() && "Entry not initialized!");
3019     Entry.setAddress(Addr);
3020     Entry.setID(ID);
3021     Entry.setFlags(Flags);
3022   } else {
3023     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3024     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3025     ++OffloadingEntriesNum;
3026   }
3027 }
3028 
3029 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3030     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3031     unsigned LineNum) const {
3032   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3033   if (PerDevice == OffloadEntriesTargetRegion.end())
3034     return false;
3035   auto PerFile = PerDevice->second.find(FileID);
3036   if (PerFile == PerDevice->second.end())
3037     return false;
3038   auto PerParentName = PerFile->second.find(ParentName);
3039   if (PerParentName == PerFile->second.end())
3040     return false;
3041   auto PerLine = PerParentName->second.find(LineNum);
3042   if (PerLine == PerParentName->second.end())
3043     return false;
3044   // Fail if this entry is already registered.
3045   if (PerLine->second.getAddress() || PerLine->second.getID())
3046     return false;
3047   return true;
3048 }
3049 
3050 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3051     const OffloadTargetRegionEntryInfoActTy &Action) {
3052   // Scan all target region entries and perform the provided action.
3053   for (const auto &D : OffloadEntriesTargetRegion)
3054     for (const auto &F : D.second)
3055       for (const auto &P : F.second)
3056         for (const auto &L : P.second)
3057           Action(D.first, F.first, P.first(), L.first, L.second);
3058 }
3059 
3060 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3061     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3062                                        OMPTargetGlobalVarEntryKind Flags,
3063                                        unsigned Order) {
3064   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3065                                              "only required for the device "
3066                                              "code generation.");
3067   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3068   ++OffloadingEntriesNum;
3069 }
3070 
3071 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3072     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3073                                      CharUnits VarSize,
3074                                      OMPTargetGlobalVarEntryKind Flags,
3075                                      llvm::GlobalValue::LinkageTypes Linkage) {
3076   if (CGM.getLangOpts().OpenMPIsDevice) {
3077     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3078     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3079            "Entry not initialized!");
3080     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3081            "Resetting with the new address.");
3082     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3083       if (Entry.getVarSize().isZero()) {
3084         Entry.setVarSize(VarSize);
3085         Entry.setLinkage(Linkage);
3086       }
3087       return;
3088     }
3089     Entry.setVarSize(VarSize);
3090     Entry.setLinkage(Linkage);
3091     Entry.setAddress(Addr);
3092   } else {
3093     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3094       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3096              "Entry not initialized!");
3097       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3098              "Resetting with the new address.");
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     OffloadEntriesDeviceGlobalVar.try_emplace(
3106         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3107     ++OffloadingEntriesNum;
3108   }
3109 }
3110 
3111 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3112     actOnDeviceGlobalVarEntriesInfo(
3113         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3114   // Scan all target region entries and perform the provided action.
3115   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3116     Action(E.getKey(), E.getValue());
3117 }
3118 
3119 void CGOpenMPRuntime::createOffloadEntry(
3120     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3121     llvm::GlobalValue::LinkageTypes Linkage) {
3122   StringRef Name = Addr->getName();
3123   llvm::Module &M = CGM.getModule();
3124   llvm::LLVMContext &C = M.getContext();
3125 
3126   // Create constant string with the name.
3127   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3128 
3129   std::string StringName = getName({"omp_offloading", "entry_name"});
3130   auto *Str = new llvm::GlobalVariable(
3131       M, StrPtrInit->getType(), /*isConstant=*/true,
3132       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3133   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3134 
3135   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3136                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3137                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3138                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3139                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3140   std::string EntryName = getName({"omp_offloading", "entry", ""});
3141   llvm::GlobalVariable *Entry = createGlobalStruct(
3142       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3143       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3144 
3145   // The entry has to be created in the section the linker expects it to be.
3146   Entry->setSection("omp_offloading_entries");
3147 }
3148 
3149 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3150   // Emit the offloading entries and metadata so that the device codegen side
3151   // can easily figure out what to emit. The produced metadata looks like
3152   // this:
3153   //
3154   // !omp_offload.info = !{!1, ...}
3155   //
3156   // Right now we only generate metadata for function that contain target
3157   // regions.
3158 
3159   // If we are in simd mode or there are no entries, we don't need to do
3160   // anything.
3161   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3162     return;
3163 
3164   llvm::Module &M = CGM.getModule();
3165   llvm::LLVMContext &C = M.getContext();
3166   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3167                          SourceLocation, StringRef>,
3168               16>
3169       OrderedEntries(OffloadEntriesInfoManager.size());
3170   llvm::SmallVector<StringRef, 16> ParentFunctions(
3171       OffloadEntriesInfoManager.size());
3172 
3173   // Auxiliary methods to create metadata values and strings.
3174   auto &&GetMDInt = [this](unsigned V) {
3175     return llvm::ConstantAsMetadata::get(
3176         llvm::ConstantInt::get(CGM.Int32Ty, V));
3177   };
3178 
3179   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3180 
3181   // Create the offloading info metadata node.
3182   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3183 
3184   // Create function that emits metadata for each target region entry;
3185   auto &&TargetRegionMetadataEmitter =
3186       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3187        &GetMDString](
3188           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3189           unsigned Line,
3190           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3191         // Generate metadata for target regions. Each entry of this metadata
3192         // contains:
3193         // - Entry 0 -> Kind of this type of metadata (0).
3194         // - Entry 1 -> Device ID of the file where the entry was identified.
3195         // - Entry 2 -> File ID of the file where the entry was identified.
3196         // - Entry 3 -> Mangled name of the function where the entry was
3197         // identified.
3198         // - Entry 4 -> Line in the file where the entry was identified.
3199         // - Entry 5 -> Order the entry was created.
3200         // The first element of the metadata node is the kind.
3201         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3202                                  GetMDInt(FileID),      GetMDString(ParentName),
3203                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3204 
3205         SourceLocation Loc;
3206         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3207                   E = CGM.getContext().getSourceManager().fileinfo_end();
3208              I != E; ++I) {
3209           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3210               I->getFirst()->getUniqueID().getFile() == FileID) {
3211             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3212                 I->getFirst(), Line, 1);
3213             break;
3214           }
3215         }
3216         // Save this entry in the right position of the ordered entries array.
3217         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3218         ParentFunctions[E.getOrder()] = ParentName;
3219 
3220         // Add metadata to the named metadata node.
3221         MD->addOperand(llvm::MDNode::get(C, Ops));
3222       };
3223 
3224   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3225       TargetRegionMetadataEmitter);
3226 
3227   // Create function that emits metadata for each device global variable entry;
3228   auto &&DeviceGlobalVarMetadataEmitter =
3229       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3230        MD](StringRef MangledName,
3231            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3232                &E) {
3233         // Generate metadata for global variables. Each entry of this metadata
3234         // contains:
3235         // - Entry 0 -> Kind of this type of metadata (1).
3236         // - Entry 1 -> Mangled name of the variable.
3237         // - Entry 2 -> Declare target kind.
3238         // - Entry 3 -> Order the entry was created.
3239         // The first element of the metadata node is the kind.
3240         llvm::Metadata *Ops[] = {
3241             GetMDInt(E.getKind()), GetMDString(MangledName),
3242             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3243 
3244         // Save this entry in the right position of the ordered entries array.
3245         OrderedEntries[E.getOrder()] =
3246             std::make_tuple(&E, SourceLocation(), MangledName);
3247 
3248         // Add metadata to the named metadata node.
3249         MD->addOperand(llvm::MDNode::get(C, Ops));
3250       };
3251 
3252   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3253       DeviceGlobalVarMetadataEmitter);
3254 
3255   for (const auto &E : OrderedEntries) {
3256     assert(std::get<0>(E) && "All ordered entries must exist!");
3257     if (const auto *CE =
3258             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3259                 std::get<0>(E))) {
3260       if (!CE->getID() || !CE->getAddress()) {
3261         // Do not blame the entry if the parent funtion is not emitted.
3262         StringRef FnName = ParentFunctions[CE->getOrder()];
3263         if (!CGM.GetGlobalValue(FnName))
3264           continue;
3265         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3266             DiagnosticsEngine::Error,
3267             "Offloading entry for target region in %0 is incorrect: either the "
3268             "address or the ID is invalid.");
3269         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3270         continue;
3271       }
3272       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3273                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3274     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3275                                              OffloadEntryInfoDeviceGlobalVar>(
3276                    std::get<0>(E))) {
3277       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3278           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3279               CE->getFlags());
3280       switch (Flags) {
3281       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3282         if (CGM.getLangOpts().OpenMPIsDevice &&
3283             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3284           continue;
3285         if (!CE->getAddress()) {
3286           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3287               DiagnosticsEngine::Error, "Offloading entry for declare target "
3288                                         "variable %0 is incorrect: the "
3289                                         "address is invalid.");
3290           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3291           continue;
3292         }
3293         // The vaiable has no definition - no need to add the entry.
3294         if (CE->getVarSize().isZero())
3295           continue;
3296         break;
3297       }
3298       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3299         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3300                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3301                "Declaret target link address is set.");
3302         if (CGM.getLangOpts().OpenMPIsDevice)
3303           continue;
3304         if (!CE->getAddress()) {
3305           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3306               DiagnosticsEngine::Error,
3307               "Offloading entry for declare target variable is incorrect: the "
3308               "address is invalid.");
3309           CGM.getDiags().Report(DiagID);
3310           continue;
3311         }
3312         break;
3313       }
3314       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3315                          CE->getVarSize().getQuantity(), Flags,
3316                          CE->getLinkage());
3317     } else {
3318       llvm_unreachable("Unsupported entry kind.");
3319     }
3320   }
3321 }
3322 
3323 /// Loads all the offload entries information from the host IR
3324 /// metadata.
3325 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3326   // If we are in target mode, load the metadata from the host IR. This code has
3327   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3328 
3329   if (!CGM.getLangOpts().OpenMPIsDevice)
3330     return;
3331 
3332   if (CGM.getLangOpts().OMPHostIRFile.empty())
3333     return;
3334 
3335   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3336   if (auto EC = Buf.getError()) {
3337     CGM.getDiags().Report(diag::err_cannot_open_file)
3338         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3339     return;
3340   }
3341 
3342   llvm::LLVMContext C;
3343   auto ME = expectedToErrorOrAndEmitErrors(
3344       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3345 
3346   if (auto EC = ME.getError()) {
3347     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3348         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3349     CGM.getDiags().Report(DiagID)
3350         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351     return;
3352   }
3353 
3354   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3355   if (!MD)
3356     return;
3357 
3358   for (llvm::MDNode *MN : MD->operands()) {
3359     auto &&GetMDInt = [MN](unsigned Idx) {
3360       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3361       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3362     };
3363 
3364     auto &&GetMDString = [MN](unsigned Idx) {
3365       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3366       return V->getString();
3367     };
3368 
3369     switch (GetMDInt(0)) {
3370     default:
3371       llvm_unreachable("Unexpected metadata!");
3372       break;
3373     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3374         OffloadingEntryInfoTargetRegion:
3375       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3376           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3377           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3378           /*Order=*/GetMDInt(5));
3379       break;
3380     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3381         OffloadingEntryInfoDeviceGlobalVar:
3382       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3383           /*MangledName=*/GetMDString(1),
3384           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3385               /*Flags=*/GetMDInt(2)),
3386           /*Order=*/GetMDInt(3));
3387       break;
3388     }
3389   }
3390 }
3391 
3392 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3393   if (!KmpRoutineEntryPtrTy) {
3394     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3395     ASTContext &C = CGM.getContext();
3396     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3397     FunctionProtoType::ExtProtoInfo EPI;
3398     KmpRoutineEntryPtrQTy = C.getPointerType(
3399         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3400     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3401   }
3402 }
3403 
3404 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3405   // Make sure the type of the entry is already created. This is the type we
3406   // have to create:
3407   // struct __tgt_offload_entry{
3408   //   void      *addr;       // Pointer to the offload entry info.
3409   //                          // (function or global)
3410   //   char      *name;       // Name of the function or global.
3411   //   size_t     size;       // Size of the entry info (0 if it a function).
3412   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3413   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3414   // };
3415   if (TgtOffloadEntryQTy.isNull()) {
3416     ASTContext &C = CGM.getContext();
3417     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3418     RD->startDefinition();
3419     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3420     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3421     addFieldToRecordDecl(C, RD, C.getSizeType());
3422     addFieldToRecordDecl(
3423         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3424     addFieldToRecordDecl(
3425         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3426     RD->completeDefinition();
3427     RD->addAttr(PackedAttr::CreateImplicit(C));
3428     TgtOffloadEntryQTy = C.getRecordType(RD);
3429   }
3430   return TgtOffloadEntryQTy;
3431 }
3432 
3433 namespace {
3434 struct PrivateHelpersTy {
3435   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3436                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3437       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3438         PrivateElemInit(PrivateElemInit) {}
3439   const Expr *OriginalRef = nullptr;
3440   const VarDecl *Original = nullptr;
3441   const VarDecl *PrivateCopy = nullptr;
3442   const VarDecl *PrivateElemInit = nullptr;
3443 };
3444 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3445 } // anonymous namespace
3446 
3447 static RecordDecl *
3448 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3449   if (!Privates.empty()) {
3450     ASTContext &C = CGM.getContext();
3451     // Build struct .kmp_privates_t. {
3452     //         /*  private vars  */
3453     //       };
3454     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3455     RD->startDefinition();
3456     for (const auto &Pair : Privates) {
3457       const VarDecl *VD = Pair.second.Original;
3458       QualType Type = VD->getType().getNonReferenceType();
3459       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3460       if (VD->hasAttrs()) {
3461         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3462              E(VD->getAttrs().end());
3463              I != E; ++I)
3464           FD->addAttr(*I);
3465       }
3466     }
3467     RD->completeDefinition();
3468     return RD;
3469   }
3470   return nullptr;
3471 }
3472 
3473 static RecordDecl *
3474 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3475                          QualType KmpInt32Ty,
3476                          QualType KmpRoutineEntryPointerQTy) {
3477   ASTContext &C = CGM.getContext();
3478   // Build struct kmp_task_t {
3479   //         void *              shareds;
3480   //         kmp_routine_entry_t routine;
3481   //         kmp_int32           part_id;
3482   //         kmp_cmplrdata_t data1;
3483   //         kmp_cmplrdata_t data2;
3484   // For taskloops additional fields:
3485   //         kmp_uint64          lb;
3486   //         kmp_uint64          ub;
3487   //         kmp_int64           st;
3488   //         kmp_int32           liter;
3489   //         void *              reductions;
3490   //       };
3491   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3492   UD->startDefinition();
3493   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3494   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3495   UD->completeDefinition();
3496   QualType KmpCmplrdataTy = C.getRecordType(UD);
3497   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3498   RD->startDefinition();
3499   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3500   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3501   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3502   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3503   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3504   if (isOpenMPTaskLoopDirective(Kind)) {
3505     QualType KmpUInt64Ty =
3506         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3507     QualType KmpInt64Ty =
3508         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3509     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3510     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3511     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3513     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3514   }
3515   RD->completeDefinition();
3516   return RD;
3517 }
3518 
3519 static RecordDecl *
3520 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3521                                      ArrayRef<PrivateDataTy> Privates) {
3522   ASTContext &C = CGM.getContext();
3523   // Build struct kmp_task_t_with_privates {
3524   //         kmp_task_t task_data;
3525   //         .kmp_privates_t. privates;
3526   //       };
3527   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3528   RD->startDefinition();
3529   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3530   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3531     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 /// Emit a proxy function which accepts kmp_task_t as the second
3537 /// argument.
3538 /// \code
3539 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3540 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3541 ///   For taskloops:
3542 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3543 ///   tt->reductions, tt->shareds);
3544 ///   return 0;
3545 /// }
3546 /// \endcode
3547 static llvm::Function *
3548 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3549                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3550                       QualType KmpTaskTWithPrivatesPtrQTy,
3551                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3552                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3553                       llvm::Value *TaskPrivatesMap) {
3554   ASTContext &C = CGM.getContext();
3555   FunctionArgList Args;
3556   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3557                             ImplicitParamDecl::Other);
3558   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3559                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3560                                 ImplicitParamDecl::Other);
3561   Args.push_back(&GtidArg);
3562   Args.push_back(&TaskTypeArg);
3563   const auto &TaskEntryFnInfo =
3564       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3565   llvm::FunctionType *TaskEntryTy =
3566       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3567   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3568   auto *TaskEntry = llvm::Function::Create(
3569       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3570   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3571   TaskEntry->setDoesNotRecurse();
3572   CodeGenFunction CGF(CGM);
3573   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3574                     Loc, Loc);
3575 
3576   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3577   // tt,
3578   // For taskloops:
3579   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3580   // tt->task_data.shareds);
3581   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3582       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3583   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3584       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3585       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3586   const auto *KmpTaskTWithPrivatesQTyRD =
3587       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3588   LValue Base =
3589       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3590   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3591   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3592   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3593   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3594 
3595   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3596   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3597   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3598       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3599       CGF.ConvertTypeForMem(SharedsPtrTy));
3600 
3601   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3602   llvm::Value *PrivatesParam;
3603   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3604     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3605     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3606         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3607   } else {
3608     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3609   }
3610 
3611   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3612                                TaskPrivatesMap,
3613                                CGF.Builder
3614                                    .CreatePointerBitCastOrAddrSpaceCast(
3615                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3616                                    .getPointer()};
3617   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3618                                           std::end(CommonArgs));
3619   if (isOpenMPTaskLoopDirective(Kind)) {
3620     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3621     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3622     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3623     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3624     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3625     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3626     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3627     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3628     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3629     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3630     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3631     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3632     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3633     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3634     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3635     CallArgs.push_back(LBParam);
3636     CallArgs.push_back(UBParam);
3637     CallArgs.push_back(StParam);
3638     CallArgs.push_back(LIParam);
3639     CallArgs.push_back(RParam);
3640   }
3641   CallArgs.push_back(SharedsParam);
3642 
3643   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3644                                                   CallArgs);
3645   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3646                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3647   CGF.FinishFunction();
3648   return TaskEntry;
3649 }
3650 
3651 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3652                                             SourceLocation Loc,
3653                                             QualType KmpInt32Ty,
3654                                             QualType KmpTaskTWithPrivatesPtrQTy,
3655                                             QualType KmpTaskTWithPrivatesQTy) {
3656   ASTContext &C = CGM.getContext();
3657   FunctionArgList Args;
3658   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3659                             ImplicitParamDecl::Other);
3660   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3661                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3662                                 ImplicitParamDecl::Other);
3663   Args.push_back(&GtidArg);
3664   Args.push_back(&TaskTypeArg);
3665   const auto &DestructorFnInfo =
3666       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3667   llvm::FunctionType *DestructorFnTy =
3668       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3669   std::string Name =
3670       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3671   auto *DestructorFn =
3672       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3673                              Name, &CGM.getModule());
3674   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3675                                     DestructorFnInfo);
3676   DestructorFn->setDoesNotRecurse();
3677   CodeGenFunction CGF(CGM);
3678   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3679                     Args, Loc, Loc);
3680 
3681   LValue Base = CGF.EmitLoadOfPointerLValue(
3682       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3683       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3684   const auto *KmpTaskTWithPrivatesQTyRD =
3685       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3686   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3687   Base = CGF.EmitLValueForField(Base, *FI);
3688   for (const auto *Field :
3689        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3690     if (QualType::DestructionKind DtorKind =
3691             Field->getType().isDestructedType()) {
3692       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3693       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3694     }
3695   }
3696   CGF.FinishFunction();
3697   return DestructorFn;
3698 }
3699 
3700 /// Emit a privates mapping function for correct handling of private and
3701 /// firstprivate variables.
3702 /// \code
3703 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3704 /// **noalias priv1,...,  <tyn> **noalias privn) {
3705 ///   *priv1 = &.privates.priv1;
3706 ///   ...;
3707 ///   *privn = &.privates.privn;
3708 /// }
3709 /// \endcode
3710 static llvm::Value *
3711 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3712                                ArrayRef<const Expr *> PrivateVars,
3713                                ArrayRef<const Expr *> FirstprivateVars,
3714                                ArrayRef<const Expr *> LastprivateVars,
3715                                QualType PrivatesQTy,
3716                                ArrayRef<PrivateDataTy> Privates) {
3717   ASTContext &C = CGM.getContext();
3718   FunctionArgList Args;
3719   ImplicitParamDecl TaskPrivatesArg(
3720       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3721       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3722       ImplicitParamDecl::Other);
3723   Args.push_back(&TaskPrivatesArg);
3724   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3725   unsigned Counter = 1;
3726   for (const Expr *E : PrivateVars) {
3727     Args.push_back(ImplicitParamDecl::Create(
3728         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3729         C.getPointerType(C.getPointerType(E->getType()))
3730             .withConst()
3731             .withRestrict(),
3732         ImplicitParamDecl::Other));
3733     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3734     PrivateVarsPos[VD] = Counter;
3735     ++Counter;
3736   }
3737   for (const Expr *E : FirstprivateVars) {
3738     Args.push_back(ImplicitParamDecl::Create(
3739         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3740         C.getPointerType(C.getPointerType(E->getType()))
3741             .withConst()
3742             .withRestrict(),
3743         ImplicitParamDecl::Other));
3744     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3745     PrivateVarsPos[VD] = Counter;
3746     ++Counter;
3747   }
3748   for (const Expr *E : LastprivateVars) {
3749     Args.push_back(ImplicitParamDecl::Create(
3750         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3751         C.getPointerType(C.getPointerType(E->getType()))
3752             .withConst()
3753             .withRestrict(),
3754         ImplicitParamDecl::Other));
3755     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3756     PrivateVarsPos[VD] = Counter;
3757     ++Counter;
3758   }
3759   const auto &TaskPrivatesMapFnInfo =
3760       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3761   llvm::FunctionType *TaskPrivatesMapTy =
3762       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3763   std::string Name =
3764       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3765   auto *TaskPrivatesMap = llvm::Function::Create(
3766       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3767       &CGM.getModule());
3768   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3769                                     TaskPrivatesMapFnInfo);
3770   if (CGM.getLangOpts().Optimize) {
3771     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3772     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3773     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3774   }
3775   CodeGenFunction CGF(CGM);
3776   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3777                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3778 
3779   // *privi = &.privates.privi;
3780   LValue Base = CGF.EmitLoadOfPointerLValue(
3781       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3782       TaskPrivatesArg.getType()->castAs<PointerType>());
3783   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3784   Counter = 0;
3785   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3786     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3787     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3788     LValue RefLVal =
3789         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3790     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3791         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3792     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3793     ++Counter;
3794   }
3795   CGF.FinishFunction();
3796   return TaskPrivatesMap;
3797 }
3798 
3799 /// Emit initialization for private variables in task-based directives.
3800 static void emitPrivatesInit(CodeGenFunction &CGF,
3801                              const OMPExecutableDirective &D,
3802                              Address KmpTaskSharedsPtr, LValue TDBase,
3803                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3804                              QualType SharedsTy, QualType SharedsPtrTy,
3805                              const OMPTaskDataTy &Data,
3806                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3807   ASTContext &C = CGF.getContext();
3808   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3809   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3810   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3811                                  ? OMPD_taskloop
3812                                  : OMPD_task;
3813   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3814   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3815   LValue SrcBase;
3816   bool IsTargetTask =
3817       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3818       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3819   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3820   // PointersArray and SizesArray. The original variables for these arrays are
3821   // not captured and we get their addresses explicitly.
3822   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3823       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3824     SrcBase = CGF.MakeAddrLValue(
3825         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3826             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3827         SharedsTy);
3828   }
3829   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3830   for (const PrivateDataTy &Pair : Privates) {
3831     const VarDecl *VD = Pair.second.PrivateCopy;
3832     const Expr *Init = VD->getAnyInitializer();
3833     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3834                              !CGF.isTrivialInitializer(Init)))) {
3835       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3836       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3837         const VarDecl *OriginalVD = Pair.second.Original;
3838         // Check if the variable is the target-based BasePointersArray,
3839         // PointersArray or SizesArray.
3840         LValue SharedRefLValue;
3841         QualType Type = PrivateLValue.getType();
3842         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3843         if (IsTargetTask && !SharedField) {
3844           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3845                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3846                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3847                          ->getNumParams() == 0 &&
3848                  isa<TranslationUnitDecl>(
3849                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3850                          ->getDeclContext()) &&
3851                  "Expected artificial target data variable.");
3852           SharedRefLValue =
3853               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3854         } else if (ForDup) {
3855           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3856           SharedRefLValue = CGF.MakeAddrLValue(
3857               Address(SharedRefLValue.getPointer(CGF),
3858                       C.getDeclAlign(OriginalVD)),
3859               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3860               SharedRefLValue.getTBAAInfo());
3861         } else if (CGF.LambdaCaptureFields.count(
3862                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3863                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3864           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3865         } else {
3866           // Processing for implicitly captured variables.
3867           InlinedOpenMPRegionRAII Region(
3868               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3869               /*HasCancel=*/false);
3870           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3871         }
3872         if (Type->isArrayType()) {
3873           // Initialize firstprivate array.
3874           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3875             // Perform simple memcpy.
3876             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3877           } else {
3878             // Initialize firstprivate array using element-by-element
3879             // initialization.
3880             CGF.EmitOMPAggregateAssign(
3881                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3882                 Type,
3883                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3884                                                   Address SrcElement) {
3885                   // Clean up any temporaries needed by the initialization.
3886                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3887                   InitScope.addPrivate(
3888                       Elem, [SrcElement]() -> Address { return SrcElement; });
3889                   (void)InitScope.Privatize();
3890                   // Emit initialization for single element.
3891                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3892                       CGF, &CapturesInfo);
3893                   CGF.EmitAnyExprToMem(Init, DestElement,
3894                                        Init->getType().getQualifiers(),
3895                                        /*IsInitializer=*/false);
3896                 });
3897           }
3898         } else {
3899           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3900           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3901             return SharedRefLValue.getAddress(CGF);
3902           });
3903           (void)InitScope.Privatize();
3904           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3905           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3906                              /*capturedByInit=*/false);
3907         }
3908       } else {
3909         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3910       }
3911     }
3912     ++FI;
3913   }
3914 }
3915 
3916 /// Check if duplication function is required for taskloops.
3917 static bool checkInitIsRequired(CodeGenFunction &CGF,
3918                                 ArrayRef<PrivateDataTy> Privates) {
3919   bool InitRequired = false;
3920   for (const PrivateDataTy &Pair : Privates) {
3921     const VarDecl *VD = Pair.second.PrivateCopy;
3922     const Expr *Init = VD->getAnyInitializer();
3923     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3924                                     !CGF.isTrivialInitializer(Init));
3925     if (InitRequired)
3926       break;
3927   }
3928   return InitRequired;
3929 }
3930 
3931 
3932 /// Emit task_dup function (for initialization of
3933 /// private/firstprivate/lastprivate vars and last_iter flag)
3934 /// \code
3935 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3936 /// lastpriv) {
3937 /// // setup lastprivate flag
3938 ///    task_dst->last = lastpriv;
3939 /// // could be constructor calls here...
3940 /// }
3941 /// \endcode
3942 static llvm::Value *
3943 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3944                     const OMPExecutableDirective &D,
3945                     QualType KmpTaskTWithPrivatesPtrQTy,
3946                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3947                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3948                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3949                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3950   ASTContext &C = CGM.getContext();
3951   FunctionArgList Args;
3952   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3953                            KmpTaskTWithPrivatesPtrQTy,
3954                            ImplicitParamDecl::Other);
3955   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3956                            KmpTaskTWithPrivatesPtrQTy,
3957                            ImplicitParamDecl::Other);
3958   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3959                                 ImplicitParamDecl::Other);
3960   Args.push_back(&DstArg);
3961   Args.push_back(&SrcArg);
3962   Args.push_back(&LastprivArg);
3963   const auto &TaskDupFnInfo =
3964       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3965   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3966   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3967   auto *TaskDup = llvm::Function::Create(
3968       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3969   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3970   TaskDup->setDoesNotRecurse();
3971   CodeGenFunction CGF(CGM);
3972   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3973                     Loc);
3974 
3975   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3976       CGF.GetAddrOfLocalVar(&DstArg),
3977       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3978   // task_dst->liter = lastpriv;
3979   if (WithLastIter) {
3980     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3981     LValue Base = CGF.EmitLValueForField(
3982         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3983     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3984     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3985         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3986     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3987   }
3988 
3989   // Emit initial values for private copies (if any).
3990   assert(!Privates.empty());
3991   Address KmpTaskSharedsPtr = Address::invalid();
3992   if (!Data.FirstprivateVars.empty()) {
3993     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3994         CGF.GetAddrOfLocalVar(&SrcArg),
3995         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3996     LValue Base = CGF.EmitLValueForField(
3997         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3998     KmpTaskSharedsPtr = Address(
3999         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4000                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4001                                                   KmpTaskTShareds)),
4002                              Loc),
4003         CGM.getNaturalTypeAlignment(SharedsTy));
4004   }
4005   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4006                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4007   CGF.FinishFunction();
4008   return TaskDup;
4009 }
4010 
4011 /// Checks if destructor function is required to be generated.
4012 /// \return true if cleanups are required, false otherwise.
4013 static bool
4014 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4015   bool NeedsCleanup = false;
4016   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4017   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4018   for (const FieldDecl *FD : PrivateRD->fields()) {
4019     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4020     if (NeedsCleanup)
4021       break;
4022   }
4023   return NeedsCleanup;
4024 }
4025 
4026 namespace {
4027 /// Loop generator for OpenMP iterator expression.
4028 class OMPIteratorGeneratorScope final
4029     : public CodeGenFunction::OMPPrivateScope {
4030   CodeGenFunction &CGF;
4031   const OMPIteratorExpr *E = nullptr;
4032   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4033   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4034   OMPIteratorGeneratorScope() = delete;
4035   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4036 
4037 public:
4038   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4039       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4040     if (!E)
4041       return;
4042     SmallVector<llvm::Value *, 4> Uppers;
4043     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4044       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4045       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4046       addPrivate(VD, [&CGF, VD]() {
4047         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4048       });
4049       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4050       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4051         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4052                                  "counter.addr");
4053       });
4054     }
4055     Privatize();
4056 
4057     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4058       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4059       LValue CLVal =
4060           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4061                              HelperData.CounterVD->getType());
4062       // Counter = 0;
4063       CGF.EmitStoreOfScalar(
4064           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4065           CLVal);
4066       CodeGenFunction::JumpDest &ContDest =
4067           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4068       CodeGenFunction::JumpDest &ExitDest =
4069           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4070       // N = <number-of_iterations>;
4071       llvm::Value *N = Uppers[I];
4072       // cont:
4073       // if (Counter < N) goto body; else goto exit;
4074       CGF.EmitBlock(ContDest.getBlock());
4075       auto *CVal =
4076           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4077       llvm::Value *Cmp =
4078           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4079               ? CGF.Builder.CreateICmpSLT(CVal, N)
4080               : CGF.Builder.CreateICmpULT(CVal, N);
4081       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4082       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4083       // body:
4084       CGF.EmitBlock(BodyBB);
4085       // Iteri = Begini + Counter * Stepi;
4086       CGF.EmitIgnoredExpr(HelperData.Update);
4087     }
4088   }
4089   ~OMPIteratorGeneratorScope() {
4090     if (!E)
4091       return;
4092     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4093       // Counter = Counter + 1;
4094       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4095       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4096       // goto cont;
4097       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4098       // exit:
4099       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4100     }
4101   }
4102 };
4103 } // namespace
4104 
4105 static std::pair<llvm::Value *, llvm::Value *>
4106 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4107   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4108   llvm::Value *Addr;
4109   if (OASE) {
4110     const Expr *Base = OASE->getBase();
4111     Addr = CGF.EmitScalarExpr(Base);
4112   } else {
4113     Addr = CGF.EmitLValue(E).getPointer(CGF);
4114   }
4115   llvm::Value *SizeVal;
4116   QualType Ty = E->getType();
4117   if (OASE) {
4118     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4119     for (const Expr *SE : OASE->getDimensions()) {
4120       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4121       Sz = CGF.EmitScalarConversion(
4122           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4123       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4124     }
4125   } else if (const auto *ASE =
4126                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4127     LValue UpAddrLVal =
4128         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4129     llvm::Value *UpAddr =
4130         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4131     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4132     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4133     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4134   } else {
4135     SizeVal = CGF.getTypeSize(Ty);
4136   }
4137   return std::make_pair(Addr, SizeVal);
4138 }
4139 
4140 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4141 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4142   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4143   if (KmpTaskAffinityInfoTy.isNull()) {
4144     RecordDecl *KmpAffinityInfoRD =
4145         C.buildImplicitRecord("kmp_task_affinity_info_t");
4146     KmpAffinityInfoRD->startDefinition();
4147     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4150     KmpAffinityInfoRD->completeDefinition();
4151     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4152   }
4153 }
4154 
4155 CGOpenMPRuntime::TaskResultTy
4156 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4157                               const OMPExecutableDirective &D,
4158                               llvm::Function *TaskFunction, QualType SharedsTy,
4159                               Address Shareds, const OMPTaskDataTy &Data) {
4160   ASTContext &C = CGM.getContext();
4161   llvm::SmallVector<PrivateDataTy, 4> Privates;
4162   // Aggregate privates and sort them by the alignment.
4163   const auto *I = Data.PrivateCopies.begin();
4164   for (const Expr *E : Data.PrivateVars) {
4165     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4166     Privates.emplace_back(
4167         C.getDeclAlign(VD),
4168         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4169                          /*PrivateElemInit=*/nullptr));
4170     ++I;
4171   }
4172   I = Data.FirstprivateCopies.begin();
4173   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4174   for (const Expr *E : Data.FirstprivateVars) {
4175     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4176     Privates.emplace_back(
4177         C.getDeclAlign(VD),
4178         PrivateHelpersTy(
4179             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4180             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4181     ++I;
4182     ++IElemInitRef;
4183   }
4184   I = Data.LastprivateCopies.begin();
4185   for (const Expr *E : Data.LastprivateVars) {
4186     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4187     Privates.emplace_back(
4188         C.getDeclAlign(VD),
4189         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4190                          /*PrivateElemInit=*/nullptr));
4191     ++I;
4192   }
4193   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4194     return L.first > R.first;
4195   });
4196   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4197   // Build type kmp_routine_entry_t (if not built yet).
4198   emitKmpRoutineEntryT(KmpInt32Ty);
4199   // Build type kmp_task_t (if not built yet).
4200   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4201     if (SavedKmpTaskloopTQTy.isNull()) {
4202       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4203           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4204     }
4205     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4206   } else {
4207     assert((D.getDirectiveKind() == OMPD_task ||
4208             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4209             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4210            "Expected taskloop, task or target directive");
4211     if (SavedKmpTaskTQTy.isNull()) {
4212       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4213           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4214     }
4215     KmpTaskTQTy = SavedKmpTaskTQTy;
4216   }
4217   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4218   // Build particular struct kmp_task_t for the given task.
4219   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4220       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4221   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4222   QualType KmpTaskTWithPrivatesPtrQTy =
4223       C.getPointerType(KmpTaskTWithPrivatesQTy);
4224   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4226       KmpTaskTWithPrivatesTy->getPointerTo();
4227   llvm::Value *KmpTaskTWithPrivatesTySize =
4228       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4229   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4230 
4231   // Emit initial values for private copies (if any).
4232   llvm::Value *TaskPrivatesMap = nullptr;
4233   llvm::Type *TaskPrivatesMapTy =
4234       std::next(TaskFunction->arg_begin(), 3)->getType();
4235   if (!Privates.empty()) {
4236     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4237     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4238         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4239         FI->getType(), Privates);
4240     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4241         TaskPrivatesMap, TaskPrivatesMapTy);
4242   } else {
4243     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4244         cast<llvm::PointerType>(TaskPrivatesMapTy));
4245   }
4246   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4247   // kmp_task_t *tt);
4248   llvm::Function *TaskEntry = emitProxyTaskFunction(
4249       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4250       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4251       TaskPrivatesMap);
4252 
4253   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4254   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4255   // kmp_routine_entry_t *task_entry);
4256   // Task flags. Format is taken from
4257   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4258   // description of kmp_tasking_flags struct.
4259   enum {
4260     TiedFlag = 0x1,
4261     FinalFlag = 0x2,
4262     DestructorsFlag = 0x8,
4263     PriorityFlag = 0x20,
4264     DetachableFlag = 0x40,
4265   };
4266   unsigned Flags = Data.Tied ? TiedFlag : 0;
4267   bool NeedsCleanup = false;
4268   if (!Privates.empty()) {
4269     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4270     if (NeedsCleanup)
4271       Flags = Flags | DestructorsFlag;
4272   }
4273   if (Data.Priority.getInt())
4274     Flags = Flags | PriorityFlag;
4275   if (D.hasClausesOfKind<OMPDetachClause>())
4276     Flags = Flags | DetachableFlag;
4277   llvm::Value *TaskFlags =
4278       Data.Final.getPointer()
4279           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4280                                      CGF.Builder.getInt32(FinalFlag),
4281                                      CGF.Builder.getInt32(/*C=*/0))
4282           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4283   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4284   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4285   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4286       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4287       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4288           TaskEntry, KmpRoutineEntryPtrTy)};
4289   llvm::Value *NewTask;
4290   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4291     // Check if we have any device clause associated with the directive.
4292     const Expr *Device = nullptr;
4293     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4294       Device = C->getDevice();
4295     // Emit device ID if any otherwise use default value.
4296     llvm::Value *DeviceID;
4297     if (Device)
4298       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4299                                            CGF.Int64Ty, /*isSigned=*/true);
4300     else
4301       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4302     AllocArgs.push_back(DeviceID);
4303     NewTask = CGF.EmitRuntimeCall(
4304         OMPBuilder.getOrCreateRuntimeFunction(
4305             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4306         AllocArgs);
4307   } else {
4308     NewTask =
4309         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4310                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4311                             AllocArgs);
4312   }
4313   // Emit detach clause initialization.
4314   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4315   // task_descriptor);
4316   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4317     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4318     LValue EvtLVal = CGF.EmitLValue(Evt);
4319 
4320     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4321     // int gtid, kmp_task_t *task);
4322     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4323     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4324     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4325     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4326         OMPBuilder.getOrCreateRuntimeFunction(
4327             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4328         {Loc, Tid, NewTask});
4329     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4330                                       Evt->getExprLoc());
4331     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4332   }
4333   // Process affinity clauses.
4334   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4335     // Process list of affinity data.
4336     ASTContext &C = CGM.getContext();
4337     Address AffinitiesArray = Address::invalid();
4338     // Calculate number of elements to form the array of affinity data.
4339     llvm::Value *NumOfElements = nullptr;
4340     unsigned NumAffinities = 0;
4341     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4342       if (const Expr *Modifier = C->getModifier()) {
4343         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4344         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4345           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4346           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4347           NumOfElements =
4348               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4349         }
4350       } else {
4351         NumAffinities += C->varlist_size();
4352       }
4353     }
4354     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4355     // Fields ids in kmp_task_affinity_info record.
4356     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4357 
4358     QualType KmpTaskAffinityInfoArrayTy;
4359     if (NumOfElements) {
4360       NumOfElements = CGF.Builder.CreateNUWAdd(
4361           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4362       OpaqueValueExpr OVE(
4363           Loc,
4364           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4365           VK_RValue);
4366       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4367                                                     RValue::get(NumOfElements));
4368       KmpTaskAffinityInfoArrayTy =
4369           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4370                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4371       // Properly emit variable-sized array.
4372       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4373                                            ImplicitParamDecl::Other);
4374       CGF.EmitVarDecl(*PD);
4375       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4376       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4377                                                 /*isSigned=*/false);
4378     } else {
4379       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4380           KmpTaskAffinityInfoTy,
4381           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4382           ArrayType::Normal, /*IndexTypeQuals=*/0);
4383       AffinitiesArray =
4384           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4385       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4386       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4387                                              /*isSigned=*/false);
4388     }
4389 
4390     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4391     // Fill array by elements without iterators.
4392     unsigned Pos = 0;
4393     bool HasIterator = false;
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       if (C->getModifier()) {
4396         HasIterator = true;
4397         continue;
4398       }
4399       for (const Expr *E : C->varlists()) {
4400         llvm::Value *Addr;
4401         llvm::Value *Size;
4402         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4403         LValue Base =
4404             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4405                                KmpTaskAffinityInfoTy);
4406         // affs[i].base_addr = &<Affinities[i].second>;
4407         LValue BaseAddrLVal = CGF.EmitLValueForField(
4408             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4409         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4410                               BaseAddrLVal);
4411         // affs[i].len = sizeof(<Affinities[i].second>);
4412         LValue LenLVal = CGF.EmitLValueForField(
4413             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4414         CGF.EmitStoreOfScalar(Size, LenLVal);
4415         ++Pos;
4416       }
4417     }
4418     LValue PosLVal;
4419     if (HasIterator) {
4420       PosLVal = CGF.MakeAddrLValue(
4421           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4422           C.getSizeType());
4423       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4424     }
4425     // Process elements with iterators.
4426     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4427       const Expr *Modifier = C->getModifier();
4428       if (!Modifier)
4429         continue;
4430       OMPIteratorGeneratorScope IteratorScope(
4431           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4432       for (const Expr *E : C->varlists()) {
4433         llvm::Value *Addr;
4434         llvm::Value *Size;
4435         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4436         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4437         LValue Base = CGF.MakeAddrLValue(
4438             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4439                     AffinitiesArray.getAlignment()),
4440             KmpTaskAffinityInfoTy);
4441         // affs[i].base_addr = &<Affinities[i].second>;
4442         LValue BaseAddrLVal = CGF.EmitLValueForField(
4443             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4444         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4445                               BaseAddrLVal);
4446         // affs[i].len = sizeof(<Affinities[i].second>);
4447         LValue LenLVal = CGF.EmitLValueForField(
4448             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4449         CGF.EmitStoreOfScalar(Size, LenLVal);
4450         Idx = CGF.Builder.CreateNUWAdd(
4451             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4452         CGF.EmitStoreOfScalar(Idx, PosLVal);
4453       }
4454     }
4455     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4456     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4457     // naffins, kmp_task_affinity_info_t *affin_list);
4458     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4459     llvm::Value *GTid = getThreadID(CGF, Loc);
4460     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4461         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4462     // FIXME: Emit the function and ignore its result for now unless the
4463     // runtime function is properly implemented.
4464     (void)CGF.EmitRuntimeCall(
4465         OMPBuilder.getOrCreateRuntimeFunction(
4466             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4467         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4468   }
4469   llvm::Value *NewTaskNewTaskTTy =
4470       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4471           NewTask, KmpTaskTWithPrivatesPtrTy);
4472   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4473                                                KmpTaskTWithPrivatesQTy);
4474   LValue TDBase =
4475       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4476   // Fill the data in the resulting kmp_task_t record.
4477   // Copy shareds if there are any.
4478   Address KmpTaskSharedsPtr = Address::invalid();
4479   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4480     KmpTaskSharedsPtr =
4481         Address(CGF.EmitLoadOfScalar(
4482                     CGF.EmitLValueForField(
4483                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4484                                            KmpTaskTShareds)),
4485                     Loc),
4486                 CGM.getNaturalTypeAlignment(SharedsTy));
4487     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4488     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4489     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4490   }
4491   // Emit initial values for private copies (if any).
4492   TaskResultTy Result;
4493   if (!Privates.empty()) {
4494     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4495                      SharedsTy, SharedsPtrTy, Data, Privates,
4496                      /*ForDup=*/false);
4497     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4498         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4499       Result.TaskDupFn = emitTaskDupFunction(
4500           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4501           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4502           /*WithLastIter=*/!Data.LastprivateVars.empty());
4503     }
4504   }
4505   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4506   enum { Priority = 0, Destructors = 1 };
4507   // Provide pointer to function with destructors for privates.
4508   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4509   const RecordDecl *KmpCmplrdataUD =
4510       (*FI)->getType()->getAsUnionType()->getDecl();
4511   if (NeedsCleanup) {
4512     llvm::Value *DestructorFn = emitDestructorsFunction(
4513         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4514         KmpTaskTWithPrivatesQTy);
4515     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4516     LValue DestructorsLV = CGF.EmitLValueForField(
4517         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4518     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4519                               DestructorFn, KmpRoutineEntryPtrTy),
4520                           DestructorsLV);
4521   }
4522   // Set priority.
4523   if (Data.Priority.getInt()) {
4524     LValue Data2LV = CGF.EmitLValueForField(
4525         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4526     LValue PriorityLV = CGF.EmitLValueForField(
4527         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4528     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4529   }
4530   Result.NewTask = NewTask;
4531   Result.TaskEntry = TaskEntry;
4532   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4533   Result.TDBase = TDBase;
4534   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4535   return Result;
4536 }
4537 
4538 namespace {
4539 /// Dependence kind for RTL.
4540 enum RTLDependenceKindTy {
4541   DepIn = 0x01,
4542   DepInOut = 0x3,
4543   DepMutexInOutSet = 0x4
4544 };
4545 /// Fields ids in kmp_depend_info record.
4546 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4547 } // namespace
4548 
4549 /// Translates internal dependency kind into the runtime kind.
4550 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4551   RTLDependenceKindTy DepKind;
4552   switch (K) {
4553   case OMPC_DEPEND_in:
4554     DepKind = DepIn;
4555     break;
4556   // Out and InOut dependencies must use the same code.
4557   case OMPC_DEPEND_out:
4558   case OMPC_DEPEND_inout:
4559     DepKind = DepInOut;
4560     break;
4561   case OMPC_DEPEND_mutexinoutset:
4562     DepKind = DepMutexInOutSet;
4563     break;
4564   case OMPC_DEPEND_source:
4565   case OMPC_DEPEND_sink:
4566   case OMPC_DEPEND_depobj:
4567   case OMPC_DEPEND_unknown:
4568     llvm_unreachable("Unknown task dependence type");
4569   }
4570   return DepKind;
4571 }
4572 
4573 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4574 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4575                            QualType &FlagsTy) {
4576   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4577   if (KmpDependInfoTy.isNull()) {
4578     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4579     KmpDependInfoRD->startDefinition();
4580     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4583     KmpDependInfoRD->completeDefinition();
4584     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4585   }
4586 }
4587 
4588 std::pair<llvm::Value *, LValue>
4589 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4590                                    SourceLocation Loc) {
4591   ASTContext &C = CGM.getContext();
4592   QualType FlagsTy;
4593   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4594   RecordDecl *KmpDependInfoRD =
4595       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4596   LValue Base = CGF.EmitLoadOfPointerLValue(
4597       DepobjLVal.getAddress(CGF),
4598       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4599   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4600   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4601           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4602   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4603                             Base.getTBAAInfo());
4604   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4605       Addr.getPointer(),
4606       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4607   LValue NumDepsBase = CGF.MakeAddrLValue(
4608       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4609       Base.getBaseInfo(), Base.getTBAAInfo());
4610   // NumDeps = deps[i].base_addr;
4611   LValue BaseAddrLVal = CGF.EmitLValueForField(
4612       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4613   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4614   return std::make_pair(NumDeps, Base);
4615 }
4616 
4617 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4618                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4619                            const OMPTaskDataTy::DependData &Data,
4620                            Address DependenciesArray) {
4621   CodeGenModule &CGM = CGF.CGM;
4622   ASTContext &C = CGM.getContext();
4623   QualType FlagsTy;
4624   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4625   RecordDecl *KmpDependInfoRD =
4626       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4627   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4628 
4629   OMPIteratorGeneratorScope IteratorScope(
4630       CGF, cast_or_null<OMPIteratorExpr>(
4631                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4632                                  : nullptr));
4633   for (const Expr *E : Data.DepExprs) {
4634     llvm::Value *Addr;
4635     llvm::Value *Size;
4636     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4637     LValue Base;
4638     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4639       Base = CGF.MakeAddrLValue(
4640           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4641     } else {
4642       LValue &PosLVal = *Pos.get<LValue *>();
4643       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4644       Base = CGF.MakeAddrLValue(
4645           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4646                   DependenciesArray.getAlignment()),
4647           KmpDependInfoTy);
4648     }
4649     // deps[i].base_addr = &<Dependencies[i].second>;
4650     LValue BaseAddrLVal = CGF.EmitLValueForField(
4651         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4652     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4653                           BaseAddrLVal);
4654     // deps[i].len = sizeof(<Dependencies[i].second>);
4655     LValue LenLVal = CGF.EmitLValueForField(
4656         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4657     CGF.EmitStoreOfScalar(Size, LenLVal);
4658     // deps[i].flags = <Dependencies[i].first>;
4659     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4660     LValue FlagsLVal = CGF.EmitLValueForField(
4661         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4662     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4663                           FlagsLVal);
4664     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4665       ++(*P);
4666     } else {
4667       LValue &PosLVal = *Pos.get<LValue *>();
4668       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4669       Idx = CGF.Builder.CreateNUWAdd(Idx,
4670                                      llvm::ConstantInt::get(Idx->getType(), 1));
4671       CGF.EmitStoreOfScalar(Idx, PosLVal);
4672     }
4673   }
4674 }
4675 
4676 static SmallVector<llvm::Value *, 4>
4677 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4678                         const OMPTaskDataTy::DependData &Data) {
4679   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4680          "Expected depobj dependecy kind.");
4681   SmallVector<llvm::Value *, 4> Sizes;
4682   SmallVector<LValue, 4> SizeLVals;
4683   ASTContext &C = CGF.getContext();
4684   QualType FlagsTy;
4685   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4686   RecordDecl *KmpDependInfoRD =
4687       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4688   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4689   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4690   {
4691     OMPIteratorGeneratorScope IteratorScope(
4692         CGF, cast_or_null<OMPIteratorExpr>(
4693                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4694                                    : nullptr));
4695     for (const Expr *E : Data.DepExprs) {
4696       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4697       LValue Base = CGF.EmitLoadOfPointerLValue(
4698           DepobjLVal.getAddress(CGF),
4699           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4700       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4701           Base.getAddress(CGF), KmpDependInfoPtrT);
4702       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4703                                 Base.getTBAAInfo());
4704       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4705           Addr.getPointer(),
4706           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4707       LValue NumDepsBase = CGF.MakeAddrLValue(
4708           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4709           Base.getBaseInfo(), Base.getTBAAInfo());
4710       // NumDeps = deps[i].base_addr;
4711       LValue BaseAddrLVal = CGF.EmitLValueForField(
4712           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4713       llvm::Value *NumDeps =
4714           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4715       LValue NumLVal = CGF.MakeAddrLValue(
4716           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4717           C.getUIntPtrType());
4718       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4719                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4720       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4721       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4722       CGF.EmitStoreOfScalar(Add, NumLVal);
4723       SizeLVals.push_back(NumLVal);
4724     }
4725   }
4726   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4727     llvm::Value *Size =
4728         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4729     Sizes.push_back(Size);
4730   }
4731   return Sizes;
4732 }
4733 
4734 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4735                                LValue PosLVal,
4736                                const OMPTaskDataTy::DependData &Data,
4737                                Address DependenciesArray) {
4738   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4739          "Expected depobj dependecy kind.");
4740   ASTContext &C = CGF.getContext();
4741   QualType FlagsTy;
4742   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4743   RecordDecl *KmpDependInfoRD =
4744       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4745   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4746   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4747   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4748   {
4749     OMPIteratorGeneratorScope IteratorScope(
4750         CGF, cast_or_null<OMPIteratorExpr>(
4751                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4752                                    : nullptr));
4753     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4754       const Expr *E = Data.DepExprs[I];
4755       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4756       LValue Base = CGF.EmitLoadOfPointerLValue(
4757           DepobjLVal.getAddress(CGF),
4758           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4759       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4760           Base.getAddress(CGF), KmpDependInfoPtrT);
4761       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4762                                 Base.getTBAAInfo());
4763 
4764       // Get number of elements in a single depobj.
4765       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4766           Addr.getPointer(),
4767           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4768       LValue NumDepsBase = CGF.MakeAddrLValue(
4769           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4770           Base.getBaseInfo(), Base.getTBAAInfo());
4771       // NumDeps = deps[i].base_addr;
4772       LValue BaseAddrLVal = CGF.EmitLValueForField(
4773           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4774       llvm::Value *NumDeps =
4775           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4776 
4777       // memcopy dependency data.
4778       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4779           ElSize,
4780           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4781       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4782       Address DepAddr =
4783           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4784                   DependenciesArray.getAlignment());
4785       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4786 
4787       // Increase pos.
4788       // pos += size;
4789       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4790       CGF.EmitStoreOfScalar(Add, PosLVal);
4791     }
4792   }
4793 }
4794 
4795 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4796     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4797     SourceLocation Loc) {
4798   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4799         return D.DepExprs.empty();
4800       }))
4801     return std::make_pair(nullptr, Address::invalid());
4802   // Process list of dependencies.
4803   ASTContext &C = CGM.getContext();
4804   Address DependenciesArray = Address::invalid();
4805   llvm::Value *NumOfElements = nullptr;
4806   unsigned NumDependencies = std::accumulate(
4807       Dependencies.begin(), Dependencies.end(), 0,
4808       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4809         return D.DepKind == OMPC_DEPEND_depobj
4810                    ? V
4811                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4812       });
4813   QualType FlagsTy;
4814   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4815   bool HasDepobjDeps = false;
4816   bool HasRegularWithIterators = false;
4817   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4818   llvm::Value *NumOfRegularWithIterators =
4819       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4820   // Calculate number of depobj dependecies and regular deps with the iterators.
4821   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4822     if (D.DepKind == OMPC_DEPEND_depobj) {
4823       SmallVector<llvm::Value *, 4> Sizes =
4824           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4825       for (llvm::Value *Size : Sizes) {
4826         NumOfDepobjElements =
4827             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4828       }
4829       HasDepobjDeps = true;
4830       continue;
4831     }
4832     // Include number of iterations, if any.
4833     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4834       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4835         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4836         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4837         NumOfRegularWithIterators =
4838             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4839       }
4840       HasRegularWithIterators = true;
4841       continue;
4842     }
4843   }
4844 
4845   QualType KmpDependInfoArrayTy;
4846   if (HasDepobjDeps || HasRegularWithIterators) {
4847     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4848                                            /*isSigned=*/false);
4849     if (HasDepobjDeps) {
4850       NumOfElements =
4851           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4852     }
4853     if (HasRegularWithIterators) {
4854       NumOfElements =
4855           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4856     }
4857     OpaqueValueExpr OVE(Loc,
4858                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4859                         VK_RValue);
4860     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4861                                                   RValue::get(NumOfElements));
4862     KmpDependInfoArrayTy =
4863         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4864                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4865     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4866     // Properly emit variable-sized array.
4867     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4868                                          ImplicitParamDecl::Other);
4869     CGF.EmitVarDecl(*PD);
4870     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4871     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4872                                               /*isSigned=*/false);
4873   } else {
4874     KmpDependInfoArrayTy = C.getConstantArrayType(
4875         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4876         ArrayType::Normal, /*IndexTypeQuals=*/0);
4877     DependenciesArray =
4878         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4879     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4880     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4881                                            /*isSigned=*/false);
4882   }
4883   unsigned Pos = 0;
4884   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4885     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4886         Dependencies[I].IteratorExpr)
4887       continue;
4888     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4889                    DependenciesArray);
4890   }
4891   // Copy regular dependecies with iterators.
4892   LValue PosLVal = CGF.MakeAddrLValue(
4893       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4894   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4895   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4896     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4897         !Dependencies[I].IteratorExpr)
4898       continue;
4899     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4900                    DependenciesArray);
4901   }
4902   // Copy final depobj arrays without iterators.
4903   if (HasDepobjDeps) {
4904     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4905       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4906         continue;
4907       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4908                          DependenciesArray);
4909     }
4910   }
4911   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4912       DependenciesArray, CGF.VoidPtrTy);
4913   return std::make_pair(NumOfElements, DependenciesArray);
4914 }
4915 
4916 Address CGOpenMPRuntime::emitDepobjDependClause(
4917     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4918     SourceLocation Loc) {
4919   if (Dependencies.DepExprs.empty())
4920     return Address::invalid();
4921   // Process list of dependencies.
4922   ASTContext &C = CGM.getContext();
4923   Address DependenciesArray = Address::invalid();
4924   unsigned NumDependencies = Dependencies.DepExprs.size();
4925   QualType FlagsTy;
4926   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4927   RecordDecl *KmpDependInfoRD =
4928       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4929 
4930   llvm::Value *Size;
4931   // Define type kmp_depend_info[<Dependencies.size()>];
4932   // For depobj reserve one extra element to store the number of elements.
4933   // It is required to handle depobj(x) update(in) construct.
4934   // kmp_depend_info[<Dependencies.size()>] deps;
4935   llvm::Value *NumDepsVal;
4936   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4937   if (const auto *IE =
4938           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4939     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4940     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4941       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4942       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4943       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4944     }
4945     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4946                                     NumDepsVal);
4947     CharUnits SizeInBytes =
4948         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4949     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4950     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4951     NumDepsVal =
4952         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4953   } else {
4954     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4955         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4956         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4957     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4958     Size = CGM.getSize(Sz.alignTo(Align));
4959     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4960   }
4961   // Need to allocate on the dynamic memory.
4962   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4963   // Use default allocator.
4964   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4965   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4966 
4967   llvm::Value *Addr =
4968       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4969                               CGM.getModule(), OMPRTL___kmpc_alloc),
4970                           Args, ".dep.arr.addr");
4971   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4972       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4973   DependenciesArray = Address(Addr, Align);
4974   // Write number of elements in the first element of array for depobj.
4975   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4976   // deps[i].base_addr = NumDependencies;
4977   LValue BaseAddrLVal = CGF.EmitLValueForField(
4978       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4979   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4980   llvm::PointerUnion<unsigned *, LValue *> Pos;
4981   unsigned Idx = 1;
4982   LValue PosLVal;
4983   if (Dependencies.IteratorExpr) {
4984     PosLVal = CGF.MakeAddrLValue(
4985         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4986         C.getSizeType());
4987     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4988                           /*IsInit=*/true);
4989     Pos = &PosLVal;
4990   } else {
4991     Pos = &Idx;
4992   }
4993   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4994   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4995       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4996   return DependenciesArray;
4997 }
4998 
4999 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5000                                         SourceLocation Loc) {
5001   ASTContext &C = CGM.getContext();
5002   QualType FlagsTy;
5003   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5004   LValue Base = CGF.EmitLoadOfPointerLValue(
5005       DepobjLVal.getAddress(CGF),
5006       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5007   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5008   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5009       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5010   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5011       Addr.getPointer(),
5012       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5013   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5014                                                                CGF.VoidPtrTy);
5015   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5016   // Use default allocator.
5017   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5018   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5019 
5020   // _kmpc_free(gtid, addr, nullptr);
5021   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5022                                 CGM.getModule(), OMPRTL___kmpc_free),
5023                             Args);
5024 }
5025 
5026 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5027                                        OpenMPDependClauseKind NewDepKind,
5028                                        SourceLocation Loc) {
5029   ASTContext &C = CGM.getContext();
5030   QualType FlagsTy;
5031   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5032   RecordDecl *KmpDependInfoRD =
5033       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5034   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5035   llvm::Value *NumDeps;
5036   LValue Base;
5037   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5038 
5039   Address Begin = Base.getAddress(CGF);
5040   // Cast from pointer to array type to pointer to single element.
5041   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5042   // The basic structure here is a while-do loop.
5043   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5044   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5045   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5046   CGF.EmitBlock(BodyBB);
5047   llvm::PHINode *ElementPHI =
5048       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5049   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5050   Begin = Address(ElementPHI, Begin.getAlignment());
5051   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5052                             Base.getTBAAInfo());
5053   // deps[i].flags = NewDepKind;
5054   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5055   LValue FlagsLVal = CGF.EmitLValueForField(
5056       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5057   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5058                         FlagsLVal);
5059 
5060   // Shift the address forward by one element.
5061   Address ElementNext =
5062       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5063   ElementPHI->addIncoming(ElementNext.getPointer(),
5064                           CGF.Builder.GetInsertBlock());
5065   llvm::Value *IsEmpty =
5066       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5067   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5068   // Done.
5069   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5070 }
5071 
5072 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5073                                    const OMPExecutableDirective &D,
5074                                    llvm::Function *TaskFunction,
5075                                    QualType SharedsTy, Address Shareds,
5076                                    const Expr *IfCond,
5077                                    const OMPTaskDataTy &Data) {
5078   if (!CGF.HaveInsertPoint())
5079     return;
5080 
5081   TaskResultTy Result =
5082       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5083   llvm::Value *NewTask = Result.NewTask;
5084   llvm::Function *TaskEntry = Result.TaskEntry;
5085   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5086   LValue TDBase = Result.TDBase;
5087   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5088   // Process list of dependences.
5089   Address DependenciesArray = Address::invalid();
5090   llvm::Value *NumOfElements;
5091   std::tie(NumOfElements, DependenciesArray) =
5092       emitDependClause(CGF, Data.Dependences, Loc);
5093 
5094   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5095   // libcall.
5096   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5097   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5098   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5099   // list is not empty
5100   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5101   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5102   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5103   llvm::Value *DepTaskArgs[7];
5104   if (!Data.Dependences.empty()) {
5105     DepTaskArgs[0] = UpLoc;
5106     DepTaskArgs[1] = ThreadID;
5107     DepTaskArgs[2] = NewTask;
5108     DepTaskArgs[3] = NumOfElements;
5109     DepTaskArgs[4] = DependenciesArray.getPointer();
5110     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5111     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5112   }
5113   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5114                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5115     if (!Data.Tied) {
5116       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5117       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5118       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5119     }
5120     if (!Data.Dependences.empty()) {
5121       CGF.EmitRuntimeCall(
5122           OMPBuilder.getOrCreateRuntimeFunction(
5123               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5124           DepTaskArgs);
5125     } else {
5126       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5127                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5128                           TaskArgs);
5129     }
5130     // Check if parent region is untied and build return for untied task;
5131     if (auto *Region =
5132             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5133       Region->emitUntiedSwitch(CGF);
5134   };
5135 
5136   llvm::Value *DepWaitTaskArgs[6];
5137   if (!Data.Dependences.empty()) {
5138     DepWaitTaskArgs[0] = UpLoc;
5139     DepWaitTaskArgs[1] = ThreadID;
5140     DepWaitTaskArgs[2] = NumOfElements;
5141     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5142     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5143     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5144   }
5145   auto &M = CGM.getModule();
5146   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5147                         TaskEntry, &Data, &DepWaitTaskArgs,
5148                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5149     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5150     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5151     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5152     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5153     // is specified.
5154     if (!Data.Dependences.empty())
5155       CGF.EmitRuntimeCall(
5156           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5157           DepWaitTaskArgs);
5158     // Call proxy_task_entry(gtid, new_task);
5159     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5160                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5161       Action.Enter(CGF);
5162       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5163       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5164                                                           OutlinedFnArgs);
5165     };
5166 
5167     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5168     // kmp_task_t *new_task);
5169     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5170     // kmp_task_t *new_task);
5171     RegionCodeGenTy RCG(CodeGen);
5172     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5173                               M, OMPRTL___kmpc_omp_task_begin_if0),
5174                           TaskArgs,
5175                           OMPBuilder.getOrCreateRuntimeFunction(
5176                               M, OMPRTL___kmpc_omp_task_complete_if0),
5177                           TaskArgs);
5178     RCG.setAction(Action);
5179     RCG(CGF);
5180   };
5181 
5182   if (IfCond) {
5183     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5184   } else {
5185     RegionCodeGenTy ThenRCG(ThenCodeGen);
5186     ThenRCG(CGF);
5187   }
5188 }
5189 
5190 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5191                                        const OMPLoopDirective &D,
5192                                        llvm::Function *TaskFunction,
5193                                        QualType SharedsTy, Address Shareds,
5194                                        const Expr *IfCond,
5195                                        const OMPTaskDataTy &Data) {
5196   if (!CGF.HaveInsertPoint())
5197     return;
5198   TaskResultTy Result =
5199       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5200   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5201   // libcall.
5202   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5203   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5204   // sched, kmp_uint64 grainsize, void *task_dup);
5205   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5206   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5207   llvm::Value *IfVal;
5208   if (IfCond) {
5209     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5210                                       /*isSigned=*/true);
5211   } else {
5212     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5213   }
5214 
5215   LValue LBLVal = CGF.EmitLValueForField(
5216       Result.TDBase,
5217       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5218   const auto *LBVar =
5219       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5220   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5221                        LBLVal.getQuals(),
5222                        /*IsInitializer=*/true);
5223   LValue UBLVal = CGF.EmitLValueForField(
5224       Result.TDBase,
5225       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5226   const auto *UBVar =
5227       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5228   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5229                        UBLVal.getQuals(),
5230                        /*IsInitializer=*/true);
5231   LValue StLVal = CGF.EmitLValueForField(
5232       Result.TDBase,
5233       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5234   const auto *StVar =
5235       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5236   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5237                        StLVal.getQuals(),
5238                        /*IsInitializer=*/true);
5239   // Store reductions address.
5240   LValue RedLVal = CGF.EmitLValueForField(
5241       Result.TDBase,
5242       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5243   if (Data.Reductions) {
5244     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5245   } else {
5246     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5247                                CGF.getContext().VoidPtrTy);
5248   }
5249   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5250   llvm::Value *TaskArgs[] = {
5251       UpLoc,
5252       ThreadID,
5253       Result.NewTask,
5254       IfVal,
5255       LBLVal.getPointer(CGF),
5256       UBLVal.getPointer(CGF),
5257       CGF.EmitLoadOfScalar(StLVal, Loc),
5258       llvm::ConstantInt::getSigned(
5259           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5260       llvm::ConstantInt::getSigned(
5261           CGF.IntTy, Data.Schedule.getPointer()
5262                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5263                          : NoSchedule),
5264       Data.Schedule.getPointer()
5265           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5266                                       /*isSigned=*/false)
5267           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5268       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5269                              Result.TaskDupFn, CGF.VoidPtrTy)
5270                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5271   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5272                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5273                       TaskArgs);
5274 }
5275 
5276 /// Emit reduction operation for each element of array (required for
5277 /// array sections) LHS op = RHS.
5278 /// \param Type Type of array.
5279 /// \param LHSVar Variable on the left side of the reduction operation
5280 /// (references element of array in original variable).
5281 /// \param RHSVar Variable on the right side of the reduction operation
5282 /// (references element of array in original variable).
5283 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5284 /// RHSVar.
5285 static void EmitOMPAggregateReduction(
5286     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5287     const VarDecl *RHSVar,
5288     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5289                                   const Expr *, const Expr *)> &RedOpGen,
5290     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5291     const Expr *UpExpr = nullptr) {
5292   // Perform element-by-element initialization.
5293   QualType ElementTy;
5294   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5295   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5296 
5297   // Drill down to the base element type on both arrays.
5298   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5299   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5300 
5301   llvm::Value *RHSBegin = RHSAddr.getPointer();
5302   llvm::Value *LHSBegin = LHSAddr.getPointer();
5303   // Cast from pointer to array type to pointer to single element.
5304   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5305   // The basic structure here is a while-do loop.
5306   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5307   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5308   llvm::Value *IsEmpty =
5309       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5310   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5311 
5312   // Enter the loop body, making that address the current address.
5313   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5314   CGF.EmitBlock(BodyBB);
5315 
5316   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5317 
5318   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5319       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5320   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5321   Address RHSElementCurrent =
5322       Address(RHSElementPHI,
5323               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5324 
5325   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5326       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5327   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5328   Address LHSElementCurrent =
5329       Address(LHSElementPHI,
5330               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5331 
5332   // Emit copy.
5333   CodeGenFunction::OMPPrivateScope Scope(CGF);
5334   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5335   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5336   Scope.Privatize();
5337   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5338   Scope.ForceCleanup();
5339 
5340   // Shift the address forward by one element.
5341   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5342       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5343   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5344       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5345   // Check whether we've reached the end.
5346   llvm::Value *Done =
5347       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5348   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5349   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5350   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5351 
5352   // Done.
5353   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5354 }
5355 
5356 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5357 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5358 /// UDR combiner function.
5359 static void emitReductionCombiner(CodeGenFunction &CGF,
5360                                   const Expr *ReductionOp) {
5361   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5362     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5363       if (const auto *DRE =
5364               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5365         if (const auto *DRD =
5366                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5367           std::pair<llvm::Function *, llvm::Function *> Reduction =
5368               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5369           RValue Func = RValue::get(Reduction.first);
5370           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5371           CGF.EmitIgnoredExpr(ReductionOp);
5372           return;
5373         }
5374   CGF.EmitIgnoredExpr(ReductionOp);
5375 }
5376 
5377 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5378     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5379     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5380     ArrayRef<const Expr *> ReductionOps) {
5381   ASTContext &C = CGM.getContext();
5382 
5383   // void reduction_func(void *LHSArg, void *RHSArg);
5384   FunctionArgList Args;
5385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5386                            ImplicitParamDecl::Other);
5387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388                            ImplicitParamDecl::Other);
5389   Args.push_back(&LHSArg);
5390   Args.push_back(&RHSArg);
5391   const auto &CGFI =
5392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5393   std::string Name = getName({"omp", "reduction", "reduction_func"});
5394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5395                                     llvm::GlobalValue::InternalLinkage, Name,
5396                                     &CGM.getModule());
5397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5398   Fn->setDoesNotRecurse();
5399   CodeGenFunction CGF(CGM);
5400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5401 
5402   // Dst = (void*[n])(LHSArg);
5403   // Src = (void*[n])(RHSArg);
5404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5406       ArgsType), CGF.getPointerAlign());
5407   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5408       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5409       ArgsType), CGF.getPointerAlign());
5410 
5411   //  ...
5412   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5413   //  ...
5414   CodeGenFunction::OMPPrivateScope Scope(CGF);
5415   auto IPriv = Privates.begin();
5416   unsigned Idx = 0;
5417   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5418     const auto *RHSVar =
5419         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5420     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5421       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5422     });
5423     const auto *LHSVar =
5424         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5425     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5426       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5427     });
5428     QualType PrivTy = (*IPriv)->getType();
5429     if (PrivTy->isVariablyModifiedType()) {
5430       // Get array size and emit VLA type.
5431       ++Idx;
5432       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5433       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5434       const VariableArrayType *VLA =
5435           CGF.getContext().getAsVariableArrayType(PrivTy);
5436       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5437       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5438           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5439       CGF.EmitVariablyModifiedType(PrivTy);
5440     }
5441   }
5442   Scope.Privatize();
5443   IPriv = Privates.begin();
5444   auto ILHS = LHSExprs.begin();
5445   auto IRHS = RHSExprs.begin();
5446   for (const Expr *E : ReductionOps) {
5447     if ((*IPriv)->getType()->isArrayType()) {
5448       // Emit reduction for array section.
5449       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5450       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5451       EmitOMPAggregateReduction(
5452           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5453           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5454             emitReductionCombiner(CGF, E);
5455           });
5456     } else {
5457       // Emit reduction for array subscript or single variable.
5458       emitReductionCombiner(CGF, E);
5459     }
5460     ++IPriv;
5461     ++ILHS;
5462     ++IRHS;
5463   }
5464   Scope.ForceCleanup();
5465   CGF.FinishFunction();
5466   return Fn;
5467 }
5468 
5469 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5470                                                   const Expr *ReductionOp,
5471                                                   const Expr *PrivateRef,
5472                                                   const DeclRefExpr *LHS,
5473                                                   const DeclRefExpr *RHS) {
5474   if (PrivateRef->getType()->isArrayType()) {
5475     // Emit reduction for array section.
5476     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5477     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5478     EmitOMPAggregateReduction(
5479         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5480         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481           emitReductionCombiner(CGF, ReductionOp);
5482         });
5483   } else {
5484     // Emit reduction for array subscript or single variable.
5485     emitReductionCombiner(CGF, ReductionOp);
5486   }
5487 }
5488 
5489 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5490                                     ArrayRef<const Expr *> Privates,
5491                                     ArrayRef<const Expr *> LHSExprs,
5492                                     ArrayRef<const Expr *> RHSExprs,
5493                                     ArrayRef<const Expr *> ReductionOps,
5494                                     ReductionOptionsTy Options) {
5495   if (!CGF.HaveInsertPoint())
5496     return;
5497 
5498   bool WithNowait = Options.WithNowait;
5499   bool SimpleReduction = Options.SimpleReduction;
5500 
5501   // Next code should be emitted for reduction:
5502   //
5503   // static kmp_critical_name lock = { 0 };
5504   //
5505   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5506   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5507   //  ...
5508   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5509   //  *(Type<n>-1*)rhs[<n>-1]);
5510   // }
5511   //
5512   // ...
5513   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5514   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5515   // RedList, reduce_func, &<lock>)) {
5516   // case 1:
5517   //  ...
5518   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5519   //  ...
5520   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5521   // break;
5522   // case 2:
5523   //  ...
5524   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5525   //  ...
5526   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5527   // break;
5528   // default:;
5529   // }
5530   //
5531   // if SimpleReduction is true, only the next code is generated:
5532   //  ...
5533   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5534   //  ...
5535 
5536   ASTContext &C = CGM.getContext();
5537 
5538   if (SimpleReduction) {
5539     CodeGenFunction::RunCleanupsScope Scope(CGF);
5540     auto IPriv = Privates.begin();
5541     auto ILHS = LHSExprs.begin();
5542     auto IRHS = RHSExprs.begin();
5543     for (const Expr *E : ReductionOps) {
5544       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5545                                   cast<DeclRefExpr>(*IRHS));
5546       ++IPriv;
5547       ++ILHS;
5548       ++IRHS;
5549     }
5550     return;
5551   }
5552 
5553   // 1. Build a list of reduction variables.
5554   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5555   auto Size = RHSExprs.size();
5556   for (const Expr *E : Privates) {
5557     if (E->getType()->isVariablyModifiedType())
5558       // Reserve place for array size.
5559       ++Size;
5560   }
5561   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5562   QualType ReductionArrayTy =
5563       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5564                              /*IndexTypeQuals=*/0);
5565   Address ReductionList =
5566       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5567   auto IPriv = Privates.begin();
5568   unsigned Idx = 0;
5569   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5570     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5571     CGF.Builder.CreateStore(
5572         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5573             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5574         Elem);
5575     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5576       // Store array size.
5577       ++Idx;
5578       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5579       llvm::Value *Size = CGF.Builder.CreateIntCast(
5580           CGF.getVLASize(
5581                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5582               .NumElts,
5583           CGF.SizeTy, /*isSigned=*/false);
5584       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5585                               Elem);
5586     }
5587   }
5588 
5589   // 2. Emit reduce_func().
5590   llvm::Function *ReductionFn = emitReductionFunction(
5591       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5592       LHSExprs, RHSExprs, ReductionOps);
5593 
5594   // 3. Create static kmp_critical_name lock = { 0 };
5595   std::string Name = getName({"reduction"});
5596   llvm::Value *Lock = getCriticalRegionLock(Name);
5597 
5598   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5599   // RedList, reduce_func, &<lock>);
5600   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5601   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5602   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5603   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5604       ReductionList.getPointer(), CGF.VoidPtrTy);
5605   llvm::Value *Args[] = {
5606       IdentTLoc,                             // ident_t *<loc>
5607       ThreadId,                              // i32 <gtid>
5608       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5609       ReductionArrayTySize,                  // size_type sizeof(RedList)
5610       RL,                                    // void *RedList
5611       ReductionFn, // void (*) (void *, void *) <reduce_func>
5612       Lock         // kmp_critical_name *&<lock>
5613   };
5614   llvm::Value *Res = CGF.EmitRuntimeCall(
5615       OMPBuilder.getOrCreateRuntimeFunction(
5616           CGM.getModule(),
5617           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5618       Args);
5619 
5620   // 5. Build switch(res)
5621   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5622   llvm::SwitchInst *SwInst =
5623       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5624 
5625   // 6. Build case 1:
5626   //  ...
5627   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5628   //  ...
5629   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5630   // break;
5631   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5632   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5633   CGF.EmitBlock(Case1BB);
5634 
5635   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5636   llvm::Value *EndArgs[] = {
5637       IdentTLoc, // ident_t *<loc>
5638       ThreadId,  // i32 <gtid>
5639       Lock       // kmp_critical_name *&<lock>
5640   };
5641   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5642                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5643     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5644     auto IPriv = Privates.begin();
5645     auto ILHS = LHSExprs.begin();
5646     auto IRHS = RHSExprs.begin();
5647     for (const Expr *E : ReductionOps) {
5648       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5649                                      cast<DeclRefExpr>(*IRHS));
5650       ++IPriv;
5651       ++ILHS;
5652       ++IRHS;
5653     }
5654   };
5655   RegionCodeGenTy RCG(CodeGen);
5656   CommonActionTy Action(
5657       nullptr, llvm::None,
5658       OMPBuilder.getOrCreateRuntimeFunction(
5659           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5660                                       : OMPRTL___kmpc_end_reduce),
5661       EndArgs);
5662   RCG.setAction(Action);
5663   RCG(CGF);
5664 
5665   CGF.EmitBranch(DefaultBB);
5666 
5667   // 7. Build case 2:
5668   //  ...
5669   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5670   //  ...
5671   // break;
5672   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5673   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5674   CGF.EmitBlock(Case2BB);
5675 
5676   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5677                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5678     auto ILHS = LHSExprs.begin();
5679     auto IRHS = RHSExprs.begin();
5680     auto IPriv = Privates.begin();
5681     for (const Expr *E : ReductionOps) {
5682       const Expr *XExpr = nullptr;
5683       const Expr *EExpr = nullptr;
5684       const Expr *UpExpr = nullptr;
5685       BinaryOperatorKind BO = BO_Comma;
5686       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5687         if (BO->getOpcode() == BO_Assign) {
5688           XExpr = BO->getLHS();
5689           UpExpr = BO->getRHS();
5690         }
5691       }
5692       // Try to emit update expression as a simple atomic.
5693       const Expr *RHSExpr = UpExpr;
5694       if (RHSExpr) {
5695         // Analyze RHS part of the whole expression.
5696         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5697                 RHSExpr->IgnoreParenImpCasts())) {
5698           // If this is a conditional operator, analyze its condition for
5699           // min/max reduction operator.
5700           RHSExpr = ACO->getCond();
5701         }
5702         if (const auto *BORHS =
5703                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5704           EExpr = BORHS->getRHS();
5705           BO = BORHS->getOpcode();
5706         }
5707       }
5708       if (XExpr) {
5709         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5710         auto &&AtomicRedGen = [BO, VD,
5711                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5712                                     const Expr *EExpr, const Expr *UpExpr) {
5713           LValue X = CGF.EmitLValue(XExpr);
5714           RValue E;
5715           if (EExpr)
5716             E = CGF.EmitAnyExpr(EExpr);
5717           CGF.EmitOMPAtomicSimpleUpdateExpr(
5718               X, E, BO, /*IsXLHSInRHSPart=*/true,
5719               llvm::AtomicOrdering::Monotonic, Loc,
5720               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5721                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5722                 PrivateScope.addPrivate(
5723                     VD, [&CGF, VD, XRValue, Loc]() {
5724                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5725                       CGF.emitOMPSimpleStore(
5726                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5727                           VD->getType().getNonReferenceType(), Loc);
5728                       return LHSTemp;
5729                     });
5730                 (void)PrivateScope.Privatize();
5731                 return CGF.EmitAnyExpr(UpExpr);
5732               });
5733         };
5734         if ((*IPriv)->getType()->isArrayType()) {
5735           // Emit atomic reduction for array section.
5736           const auto *RHSVar =
5737               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5738           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5739                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5740         } else {
5741           // Emit atomic reduction for array subscript or single variable.
5742           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5743         }
5744       } else {
5745         // Emit as a critical region.
5746         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5747                                            const Expr *, const Expr *) {
5748           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5749           std::string Name = RT.getName({"atomic_reduction"});
5750           RT.emitCriticalRegion(
5751               CGF, Name,
5752               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5753                 Action.Enter(CGF);
5754                 emitReductionCombiner(CGF, E);
5755               },
5756               Loc);
5757         };
5758         if ((*IPriv)->getType()->isArrayType()) {
5759           const auto *LHSVar =
5760               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5761           const auto *RHSVar =
5762               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5763           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5764                                     CritRedGen);
5765         } else {
5766           CritRedGen(CGF, nullptr, nullptr, nullptr);
5767         }
5768       }
5769       ++ILHS;
5770       ++IRHS;
5771       ++IPriv;
5772     }
5773   };
5774   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5775   if (!WithNowait) {
5776     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5777     llvm::Value *EndArgs[] = {
5778         IdentTLoc, // ident_t *<loc>
5779         ThreadId,  // i32 <gtid>
5780         Lock       // kmp_critical_name *&<lock>
5781     };
5782     CommonActionTy Action(nullptr, llvm::None,
5783                           OMPBuilder.getOrCreateRuntimeFunction(
5784                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5785                           EndArgs);
5786     AtomicRCG.setAction(Action);
5787     AtomicRCG(CGF);
5788   } else {
5789     AtomicRCG(CGF);
5790   }
5791 
5792   CGF.EmitBranch(DefaultBB);
5793   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5794 }
5795 
5796 /// Generates unique name for artificial threadprivate variables.
5797 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5798 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5799                                       const Expr *Ref) {
5800   SmallString<256> Buffer;
5801   llvm::raw_svector_ostream Out(Buffer);
5802   const clang::DeclRefExpr *DE;
5803   const VarDecl *D = ::getBaseDecl(Ref, DE);
5804   if (!D)
5805     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5806   D = D->getCanonicalDecl();
5807   std::string Name = CGM.getOpenMPRuntime().getName(
5808       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5809   Out << Prefix << Name << "_"
5810       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5811   return std::string(Out.str());
5812 }
5813 
5814 /// Emits reduction initializer function:
5815 /// \code
5816 /// void @.red_init(void* %arg, void* %orig) {
5817 /// %0 = bitcast void* %arg to <type>*
5818 /// store <type> <init>, <type>* %0
5819 /// ret void
5820 /// }
5821 /// \endcode
5822 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5823                                            SourceLocation Loc,
5824                                            ReductionCodeGen &RCG, unsigned N) {
5825   ASTContext &C = CGM.getContext();
5826   QualType VoidPtrTy = C.VoidPtrTy;
5827   VoidPtrTy.addRestrict();
5828   FunctionArgList Args;
5829   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5830                           ImplicitParamDecl::Other);
5831   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5832                               ImplicitParamDecl::Other);
5833   Args.emplace_back(&Param);
5834   Args.emplace_back(&ParamOrig);
5835   const auto &FnInfo =
5836       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5837   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5838   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5839   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5840                                     Name, &CGM.getModule());
5841   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5842   Fn->setDoesNotRecurse();
5843   CodeGenFunction CGF(CGM);
5844   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5845   Address PrivateAddr = CGF.EmitLoadOfPointer(
5846       CGF.GetAddrOfLocalVar(&Param),
5847       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5848   llvm::Value *Size = nullptr;
5849   // If the size of the reduction item is non-constant, load it from global
5850   // threadprivate variable.
5851   if (RCG.getSizes(N).second) {
5852     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5853         CGF, CGM.getContext().getSizeType(),
5854         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5855     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5856                                 CGM.getContext().getSizeType(), Loc);
5857   }
5858   RCG.emitAggregateType(CGF, N, Size);
5859   LValue OrigLVal;
5860   // If initializer uses initializer from declare reduction construct, emit a
5861   // pointer to the address of the original reduction item (reuired by reduction
5862   // initializer)
5863   if (RCG.usesReductionInitializer(N)) {
5864     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5865     SharedAddr = CGF.EmitLoadOfPointer(
5866         SharedAddr,
5867         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5868     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5869   } else {
5870     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5871         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5872         CGM.getContext().VoidPtrTy);
5873   }
5874   // Emit the initializer:
5875   // %0 = bitcast void* %arg to <type>*
5876   // store <type> <init>, <type>* %0
5877   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5878                          [](CodeGenFunction &) { return false; });
5879   CGF.FinishFunction();
5880   return Fn;
5881 }
5882 
5883 /// Emits reduction combiner function:
5884 /// \code
5885 /// void @.red_comb(void* %arg0, void* %arg1) {
5886 /// %lhs = bitcast void* %arg0 to <type>*
5887 /// %rhs = bitcast void* %arg1 to <type>*
5888 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5889 /// store <type> %2, <type>* %lhs
5890 /// ret void
5891 /// }
5892 /// \endcode
5893 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5894                                            SourceLocation Loc,
5895                                            ReductionCodeGen &RCG, unsigned N,
5896                                            const Expr *ReductionOp,
5897                                            const Expr *LHS, const Expr *RHS,
5898                                            const Expr *PrivateRef) {
5899   ASTContext &C = CGM.getContext();
5900   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5901   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5902   FunctionArgList Args;
5903   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5904                                C.VoidPtrTy, ImplicitParamDecl::Other);
5905   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5906                             ImplicitParamDecl::Other);
5907   Args.emplace_back(&ParamInOut);
5908   Args.emplace_back(&ParamIn);
5909   const auto &FnInfo =
5910       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5911   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5912   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5913   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5914                                     Name, &CGM.getModule());
5915   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5916   Fn->setDoesNotRecurse();
5917   CodeGenFunction CGF(CGM);
5918   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5919   llvm::Value *Size = nullptr;
5920   // If the size of the reduction item is non-constant, load it from global
5921   // threadprivate variable.
5922   if (RCG.getSizes(N).second) {
5923     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5924         CGF, CGM.getContext().getSizeType(),
5925         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5926     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5927                                 CGM.getContext().getSizeType(), Loc);
5928   }
5929   RCG.emitAggregateType(CGF, N, Size);
5930   // Remap lhs and rhs variables to the addresses of the function arguments.
5931   // %lhs = bitcast void* %arg0 to <type>*
5932   // %rhs = bitcast void* %arg1 to <type>*
5933   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5934   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5935     // Pull out the pointer to the variable.
5936     Address PtrAddr = CGF.EmitLoadOfPointer(
5937         CGF.GetAddrOfLocalVar(&ParamInOut),
5938         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5939     return CGF.Builder.CreateElementBitCast(
5940         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5941   });
5942   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5943     // Pull out the pointer to the variable.
5944     Address PtrAddr = CGF.EmitLoadOfPointer(
5945         CGF.GetAddrOfLocalVar(&ParamIn),
5946         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5947     return CGF.Builder.CreateElementBitCast(
5948         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5949   });
5950   PrivateScope.Privatize();
5951   // Emit the combiner body:
5952   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5953   // store <type> %2, <type>* %lhs
5954   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5955       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5956       cast<DeclRefExpr>(RHS));
5957   CGF.FinishFunction();
5958   return Fn;
5959 }
5960 
5961 /// Emits reduction finalizer function:
5962 /// \code
5963 /// void @.red_fini(void* %arg) {
5964 /// %0 = bitcast void* %arg to <type>*
5965 /// <destroy>(<type>* %0)
5966 /// ret void
5967 /// }
5968 /// \endcode
5969 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5970                                            SourceLocation Loc,
5971                                            ReductionCodeGen &RCG, unsigned N) {
5972   if (!RCG.needCleanups(N))
5973     return nullptr;
5974   ASTContext &C = CGM.getContext();
5975   FunctionArgList Args;
5976   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5977                           ImplicitParamDecl::Other);
5978   Args.emplace_back(&Param);
5979   const auto &FnInfo =
5980       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5981   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5982   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5983   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5984                                     Name, &CGM.getModule());
5985   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5986   Fn->setDoesNotRecurse();
5987   CodeGenFunction CGF(CGM);
5988   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5989   Address PrivateAddr = CGF.EmitLoadOfPointer(
5990       CGF.GetAddrOfLocalVar(&Param),
5991       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Emit the finalizer body:
6004   // <destroy>(<type>* %0)
6005   RCG.emitCleanups(CGF, N, PrivateAddr);
6006   CGF.FinishFunction(Loc);
6007   return Fn;
6008 }
6009 
6010 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6011     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6012     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6013   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6014     return nullptr;
6015 
6016   // Build typedef struct:
6017   // kmp_taskred_input {
6018   //   void *reduce_shar; // shared reduction item
6019   //   void *reduce_orig; // original reduction item used for initialization
6020   //   size_t reduce_size; // size of data item
6021   //   void *reduce_init; // data initialization routine
6022   //   void *reduce_fini; // data finalization routine
6023   //   void *reduce_comb; // data combiner routine
6024   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6025   // } kmp_taskred_input_t;
6026   ASTContext &C = CGM.getContext();
6027   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6028   RD->startDefinition();
6029   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6032   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6033   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6036       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6037   RD->completeDefinition();
6038   QualType RDType = C.getRecordType(RD);
6039   unsigned Size = Data.ReductionVars.size();
6040   llvm::APInt ArraySize(/*numBits=*/64, Size);
6041   QualType ArrayRDType = C.getConstantArrayType(
6042       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6043   // kmp_task_red_input_t .rd_input.[Size];
6044   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6045   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6046                        Data.ReductionCopies, Data.ReductionOps);
6047   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6048     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6049     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6050                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6051     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6052         TaskRedInput.getPointer(), Idxs,
6053         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6054         ".rd_input.gep.");
6055     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6056     // ElemLVal.reduce_shar = &Shareds[Cnt];
6057     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6058     RCG.emitSharedOrigLValue(CGF, Cnt);
6059     llvm::Value *CastedShared =
6060         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6061     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6062     // ElemLVal.reduce_orig = &Origs[Cnt];
6063     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6064     llvm::Value *CastedOrig =
6065         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6066     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6067     RCG.emitAggregateType(CGF, Cnt);
6068     llvm::Value *SizeValInChars;
6069     llvm::Value *SizeVal;
6070     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6071     // We use delayed creation/initialization for VLAs and array sections. It is
6072     // required because runtime does not provide the way to pass the sizes of
6073     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6074     // threadprivate global variables are used to store these values and use
6075     // them in the functions.
6076     bool DelayedCreation = !!SizeVal;
6077     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6078                                                /*isSigned=*/false);
6079     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6080     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6081     // ElemLVal.reduce_init = init;
6082     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6083     llvm::Value *InitAddr =
6084         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6085     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6086     // ElemLVal.reduce_fini = fini;
6087     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6088     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6089     llvm::Value *FiniAddr = Fini
6090                                 ? CGF.EmitCastToVoidPtr(Fini)
6091                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6092     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6093     // ElemLVal.reduce_comb = comb;
6094     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6095     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6096         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6097         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6098     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6099     // ElemLVal.flags = 0;
6100     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6101     if (DelayedCreation) {
6102       CGF.EmitStoreOfScalar(
6103           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6104           FlagsLVal);
6105     } else
6106       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6107                                  FlagsLVal.getType());
6108   }
6109   if (Data.IsReductionWithTaskMod) {
6110     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6111     // is_ws, int num, void *data);
6112     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6113     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6114                                                   CGM.IntTy, /*isSigned=*/true);
6115     llvm::Value *Args[] = {
6116         IdentTLoc, GTid,
6117         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6118                                /*isSigned=*/true),
6119         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6120         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6121             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6122     return CGF.EmitRuntimeCall(
6123         OMPBuilder.getOrCreateRuntimeFunction(
6124             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6125         Args);
6126   }
6127   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6128   llvm::Value *Args[] = {
6129       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6130                                 /*isSigned=*/true),
6131       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6132       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6133                                                       CGM.VoidPtrTy)};
6134   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6135                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6136                              Args);
6137 }
6138 
6139 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6140                                             SourceLocation Loc,
6141                                             bool IsWorksharingReduction) {
6142   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6143   // is_ws, int num, void *data);
6144   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6145   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6146                                                 CGM.IntTy, /*isSigned=*/true);
6147   llvm::Value *Args[] = {IdentTLoc, GTid,
6148                          llvm::ConstantInt::get(CGM.IntTy,
6149                                                 IsWorksharingReduction ? 1 : 0,
6150                                                 /*isSigned=*/true)};
6151   (void)CGF.EmitRuntimeCall(
6152       OMPBuilder.getOrCreateRuntimeFunction(
6153           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6154       Args);
6155 }
6156 
6157 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6158                                               SourceLocation Loc,
6159                                               ReductionCodeGen &RCG,
6160                                               unsigned N) {
6161   auto Sizes = RCG.getSizes(N);
6162   // Emit threadprivate global variable if the type is non-constant
6163   // (Sizes.second = nullptr).
6164   if (Sizes.second) {
6165     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6166                                                      /*isSigned=*/false);
6167     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6168         CGF, CGM.getContext().getSizeType(),
6169         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6170     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6171   }
6172 }
6173 
6174 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6175                                               SourceLocation Loc,
6176                                               llvm::Value *ReductionsPtr,
6177                                               LValue SharedLVal) {
6178   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6179   // *d);
6180   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6181                                                    CGM.IntTy,
6182                                                    /*isSigned=*/true),
6183                          ReductionsPtr,
6184                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6185                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6186   return Address(
6187       CGF.EmitRuntimeCall(
6188           OMPBuilder.getOrCreateRuntimeFunction(
6189               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6190           Args),
6191       SharedLVal.getAlignment());
6192 }
6193 
6194 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6195                                        SourceLocation Loc) {
6196   if (!CGF.HaveInsertPoint())
6197     return;
6198 
6199   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6200     OMPBuilder.CreateTaskwait(CGF.Builder);
6201   } else {
6202     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6203     // global_tid);
6204     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6205     // Ignore return result until untied tasks are supported.
6206     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6207                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6208                         Args);
6209   }
6210 
6211   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6212     Region->emitUntiedSwitch(CGF);
6213 }
6214 
6215 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6216                                            OpenMPDirectiveKind InnerKind,
6217                                            const RegionCodeGenTy &CodeGen,
6218                                            bool HasCancel) {
6219   if (!CGF.HaveInsertPoint())
6220     return;
6221   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6222   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6223 }
6224 
6225 namespace {
6226 enum RTCancelKind {
6227   CancelNoreq = 0,
6228   CancelParallel = 1,
6229   CancelLoop = 2,
6230   CancelSections = 3,
6231   CancelTaskgroup = 4
6232 };
6233 } // anonymous namespace
6234 
6235 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6236   RTCancelKind CancelKind = CancelNoreq;
6237   if (CancelRegion == OMPD_parallel)
6238     CancelKind = CancelParallel;
6239   else if (CancelRegion == OMPD_for)
6240     CancelKind = CancelLoop;
6241   else if (CancelRegion == OMPD_sections)
6242     CancelKind = CancelSections;
6243   else {
6244     assert(CancelRegion == OMPD_taskgroup);
6245     CancelKind = CancelTaskgroup;
6246   }
6247   return CancelKind;
6248 }
6249 
6250 void CGOpenMPRuntime::emitCancellationPointCall(
6251     CodeGenFunction &CGF, SourceLocation Loc,
6252     OpenMPDirectiveKind CancelRegion) {
6253   if (!CGF.HaveInsertPoint())
6254     return;
6255   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6256   // global_tid, kmp_int32 cncl_kind);
6257   if (auto *OMPRegionInfo =
6258           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6259     // For 'cancellation point taskgroup', the task region info may not have a
6260     // cancel. This may instead happen in another adjacent task.
6261     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6262       llvm::Value *Args[] = {
6263           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6264           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6265       // Ignore return result until untied tasks are supported.
6266       llvm::Value *Result = CGF.EmitRuntimeCall(
6267           OMPBuilder.getOrCreateRuntimeFunction(
6268               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6269           Args);
6270       // if (__kmpc_cancellationpoint()) {
6271       //   exit from construct;
6272       // }
6273       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6274       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6275       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6276       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6277       CGF.EmitBlock(ExitBB);
6278       // exit from construct;
6279       CodeGenFunction::JumpDest CancelDest =
6280           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6281       CGF.EmitBranchThroughCleanup(CancelDest);
6282       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6283     }
6284   }
6285 }
6286 
6287 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6288                                      const Expr *IfCond,
6289                                      OpenMPDirectiveKind CancelRegion) {
6290   if (!CGF.HaveInsertPoint())
6291     return;
6292   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6293   // kmp_int32 cncl_kind);
6294   auto &M = CGM.getModule();
6295   if (auto *OMPRegionInfo =
6296           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6297     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6298                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6299       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6300       llvm::Value *Args[] = {
6301           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6302           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6303       // Ignore return result until untied tasks are supported.
6304       llvm::Value *Result = CGF.EmitRuntimeCall(
6305           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6306       // if (__kmpc_cancel()) {
6307       //   exit from construct;
6308       // }
6309       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6310       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6311       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6312       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6313       CGF.EmitBlock(ExitBB);
6314       // exit from construct;
6315       CodeGenFunction::JumpDest CancelDest =
6316           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6317       CGF.EmitBranchThroughCleanup(CancelDest);
6318       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6319     };
6320     if (IfCond) {
6321       emitIfClause(CGF, IfCond, ThenGen,
6322                    [](CodeGenFunction &, PrePostActionTy &) {});
6323     } else {
6324       RegionCodeGenTy ThenRCG(ThenGen);
6325       ThenRCG(CGF);
6326     }
6327   }
6328 }
6329 
6330 namespace {
6331 /// Cleanup action for uses_allocators support.
6332 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6333   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6334 
6335 public:
6336   OMPUsesAllocatorsActionTy(
6337       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6338       : Allocators(Allocators) {}
6339   void Enter(CodeGenFunction &CGF) override {
6340     if (!CGF.HaveInsertPoint())
6341       return;
6342     for (const auto &AllocatorData : Allocators) {
6343       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6344           CGF, AllocatorData.first, AllocatorData.second);
6345     }
6346   }
6347   void Exit(CodeGenFunction &CGF) override {
6348     if (!CGF.HaveInsertPoint())
6349       return;
6350     for (const auto &AllocatorData : Allocators) {
6351       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6352                                                         AllocatorData.first);
6353     }
6354   }
6355 };
6356 } // namespace
6357 
6358 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6359     const OMPExecutableDirective &D, StringRef ParentName,
6360     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6361     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6362   assert(!ParentName.empty() && "Invalid target region parent name!");
6363   HasEmittedTargetRegion = true;
6364   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6365   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6366     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6367       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6368       if (!D.AllocatorTraits)
6369         continue;
6370       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6371     }
6372   }
6373   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6374   CodeGen.setAction(UsesAllocatorAction);
6375   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6376                                    IsOffloadEntry, CodeGen);
6377 }
6378 
6379 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6380                                              const Expr *Allocator,
6381                                              const Expr *AllocatorTraits) {
6382   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6383   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6384   // Use default memspace handle.
6385   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6386   llvm::Value *NumTraits = llvm::ConstantInt::get(
6387       CGF.IntTy, cast<ConstantArrayType>(
6388                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6389                      ->getSize()
6390                      .getLimitedValue());
6391   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6392   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6393       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6394   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6395                                            AllocatorTraitsLVal.getBaseInfo(),
6396                                            AllocatorTraitsLVal.getTBAAInfo());
6397   llvm::Value *Traits =
6398       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6399 
6400   llvm::Value *AllocatorVal =
6401       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6402                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6403                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6404   // Store to allocator.
6405   CGF.EmitVarDecl(*cast<VarDecl>(
6406       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6407   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6408   AllocatorVal =
6409       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6410                                Allocator->getType(), Allocator->getExprLoc());
6411   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6412 }
6413 
6414 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6415                                              const Expr *Allocator) {
6416   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6417   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6418   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6419   llvm::Value *AllocatorVal =
6420       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6421   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6422                                           CGF.getContext().VoidPtrTy,
6423                                           Allocator->getExprLoc());
6424   (void)CGF.EmitRuntimeCall(
6425       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6426                                             OMPRTL___kmpc_destroy_allocator),
6427       {ThreadId, AllocatorVal});
6428 }
6429 
6430 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6431     const OMPExecutableDirective &D, StringRef ParentName,
6432     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6433     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6434   // Create a unique name for the entry function using the source location
6435   // information of the current target region. The name will be something like:
6436   //
6437   // __omp_offloading_DD_FFFF_PP_lBB
6438   //
6439   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6440   // mangled name of the function that encloses the target region and BB is the
6441   // line number of the target region.
6442 
6443   unsigned DeviceID;
6444   unsigned FileID;
6445   unsigned Line;
6446   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6447                            Line);
6448   SmallString<64> EntryFnName;
6449   {
6450     llvm::raw_svector_ostream OS(EntryFnName);
6451     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6452        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6453   }
6454 
6455   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6456 
6457   CodeGenFunction CGF(CGM, true);
6458   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6459   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 
6461   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6462 
6463   // If this target outline function is not an offload entry, we don't need to
6464   // register it.
6465   if (!IsOffloadEntry)
6466     return;
6467 
6468   // The target region ID is used by the runtime library to identify the current
6469   // target region, so it only has to be unique and not necessarily point to
6470   // anything. It could be the pointer to the outlined function that implements
6471   // the target region, but we aren't using that so that the compiler doesn't
6472   // need to keep that, and could therefore inline the host function if proven
6473   // worthwhile during optimization. In the other hand, if emitting code for the
6474   // device, the ID has to be the function address so that it can retrieved from
6475   // the offloading entry and launched by the runtime library. We also mark the
6476   // outlined function to have external linkage in case we are emitting code for
6477   // the device, because these functions will be entry points to the device.
6478 
6479   if (CGM.getLangOpts().OpenMPIsDevice) {
6480     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6481     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6482     OutlinedFn->setDSOLocal(false);
6483   } else {
6484     std::string Name = getName({EntryFnName, "region_id"});
6485     OutlinedFnID = new llvm::GlobalVariable(
6486         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6487         llvm::GlobalValue::WeakAnyLinkage,
6488         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6489   }
6490 
6491   // Register the information for the entry associated with this target region.
6492   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6493       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6494       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6495 }
6496 
6497 /// Checks if the expression is constant or does not have non-trivial function
6498 /// calls.
6499 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6500   // We can skip constant expressions.
6501   // We can skip expressions with trivial calls or simple expressions.
6502   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6503           !E->hasNonTrivialCall(Ctx)) &&
6504          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6505 }
6506 
6507 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6508                                                     const Stmt *Body) {
6509   const Stmt *Child = Body->IgnoreContainers();
6510   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6511     Child = nullptr;
6512     for (const Stmt *S : C->body()) {
6513       if (const auto *E = dyn_cast<Expr>(S)) {
6514         if (isTrivial(Ctx, E))
6515           continue;
6516       }
6517       // Some of the statements can be ignored.
6518       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6519           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6520         continue;
6521       // Analyze declarations.
6522       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6523         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6524               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6525                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6526                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6527                   isa<UsingDirectiveDecl>(D) ||
6528                   isa<OMPDeclareReductionDecl>(D) ||
6529                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6530                 return true;
6531               const auto *VD = dyn_cast<VarDecl>(D);
6532               if (!VD)
6533                 return false;
6534               return VD->isConstexpr() ||
6535                      ((VD->getType().isTrivialType(Ctx) ||
6536                        VD->getType()->isReferenceType()) &&
6537                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6538             }))
6539           continue;
6540       }
6541       // Found multiple children - cannot get the one child only.
6542       if (Child)
6543         return nullptr;
6544       Child = S;
6545     }
6546     if (Child)
6547       Child = Child->IgnoreContainers();
6548   }
6549   return Child;
6550 }
6551 
6552 /// Emit the number of teams for a target directive.  Inspect the num_teams
6553 /// clause associated with a teams construct combined or closely nested
6554 /// with the target directive.
6555 ///
6556 /// Emit a team of size one for directives such as 'target parallel' that
6557 /// have no associated teams construct.
6558 ///
6559 /// Otherwise, return nullptr.
6560 static llvm::Value *
6561 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6562                                const OMPExecutableDirective &D) {
6563   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6564          "Clauses associated with the teams directive expected to be emitted "
6565          "only for the host!");
6566   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6567   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6568          "Expected target-based executable directive.");
6569   CGBuilderTy &Bld = CGF.Builder;
6570   switch (DirectiveKind) {
6571   case OMPD_target: {
6572     const auto *CS = D.getInnermostCapturedStmt();
6573     const auto *Body =
6574         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6575     const Stmt *ChildStmt =
6576         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6577     if (const auto *NestedDir =
6578             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6579       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6580         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6581           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6582           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6583           const Expr *NumTeams =
6584               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6585           llvm::Value *NumTeamsVal =
6586               CGF.EmitScalarExpr(NumTeams,
6587                                  /*IgnoreResultAssign*/ true);
6588           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6589                                    /*isSigned=*/true);
6590         }
6591         return Bld.getInt32(0);
6592       }
6593       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6594           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6595         return Bld.getInt32(1);
6596       return Bld.getInt32(0);
6597     }
6598     return nullptr;
6599   }
6600   case OMPD_target_teams:
6601   case OMPD_target_teams_distribute:
6602   case OMPD_target_teams_distribute_simd:
6603   case OMPD_target_teams_distribute_parallel_for:
6604   case OMPD_target_teams_distribute_parallel_for_simd: {
6605     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6606       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6607       const Expr *NumTeams =
6608           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6609       llvm::Value *NumTeamsVal =
6610           CGF.EmitScalarExpr(NumTeams,
6611                              /*IgnoreResultAssign*/ true);
6612       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6613                                /*isSigned=*/true);
6614     }
6615     return Bld.getInt32(0);
6616   }
6617   case OMPD_target_parallel:
6618   case OMPD_target_parallel_for:
6619   case OMPD_target_parallel_for_simd:
6620   case OMPD_target_simd:
6621     return Bld.getInt32(1);
6622   case OMPD_parallel:
6623   case OMPD_for:
6624   case OMPD_parallel_for:
6625   case OMPD_parallel_master:
6626   case OMPD_parallel_sections:
6627   case OMPD_for_simd:
6628   case OMPD_parallel_for_simd:
6629   case OMPD_cancel:
6630   case OMPD_cancellation_point:
6631   case OMPD_ordered:
6632   case OMPD_threadprivate:
6633   case OMPD_allocate:
6634   case OMPD_task:
6635   case OMPD_simd:
6636   case OMPD_sections:
6637   case OMPD_section:
6638   case OMPD_single:
6639   case OMPD_master:
6640   case OMPD_critical:
6641   case OMPD_taskyield:
6642   case OMPD_barrier:
6643   case OMPD_taskwait:
6644   case OMPD_taskgroup:
6645   case OMPD_atomic:
6646   case OMPD_flush:
6647   case OMPD_depobj:
6648   case OMPD_scan:
6649   case OMPD_teams:
6650   case OMPD_target_data:
6651   case OMPD_target_exit_data:
6652   case OMPD_target_enter_data:
6653   case OMPD_distribute:
6654   case OMPD_distribute_simd:
6655   case OMPD_distribute_parallel_for:
6656   case OMPD_distribute_parallel_for_simd:
6657   case OMPD_teams_distribute:
6658   case OMPD_teams_distribute_simd:
6659   case OMPD_teams_distribute_parallel_for:
6660   case OMPD_teams_distribute_parallel_for_simd:
6661   case OMPD_target_update:
6662   case OMPD_declare_simd:
6663   case OMPD_declare_variant:
6664   case OMPD_begin_declare_variant:
6665   case OMPD_end_declare_variant:
6666   case OMPD_declare_target:
6667   case OMPD_end_declare_target:
6668   case OMPD_declare_reduction:
6669   case OMPD_declare_mapper:
6670   case OMPD_taskloop:
6671   case OMPD_taskloop_simd:
6672   case OMPD_master_taskloop:
6673   case OMPD_master_taskloop_simd:
6674   case OMPD_parallel_master_taskloop:
6675   case OMPD_parallel_master_taskloop_simd:
6676   case OMPD_requires:
6677   case OMPD_unknown:
6678     break;
6679   default:
6680     break;
6681   }
6682   llvm_unreachable("Unexpected directive kind.");
6683 }
6684 
6685 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6686                                   llvm::Value *DefaultThreadLimitVal) {
6687   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6688       CGF.getContext(), CS->getCapturedStmt());
6689   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6690     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6691       llvm::Value *NumThreads = nullptr;
6692       llvm::Value *CondVal = nullptr;
6693       // Handle if clause. If if clause present, the number of threads is
6694       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6695       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6696         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6697         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6698         const OMPIfClause *IfClause = nullptr;
6699         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6700           if (C->getNameModifier() == OMPD_unknown ||
6701               C->getNameModifier() == OMPD_parallel) {
6702             IfClause = C;
6703             break;
6704           }
6705         }
6706         if (IfClause) {
6707           const Expr *Cond = IfClause->getCondition();
6708           bool Result;
6709           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6710             if (!Result)
6711               return CGF.Builder.getInt32(1);
6712           } else {
6713             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6714             if (const auto *PreInit =
6715                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6716               for (const auto *I : PreInit->decls()) {
6717                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6718                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6719                 } else {
6720                   CodeGenFunction::AutoVarEmission Emission =
6721                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6722                   CGF.EmitAutoVarCleanups(Emission);
6723                 }
6724               }
6725             }
6726             CondVal = CGF.EvaluateExprAsBool(Cond);
6727           }
6728         }
6729       }
6730       // Check the value of num_threads clause iff if clause was not specified
6731       // or is not evaluated to false.
6732       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6733         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6734         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6735         const auto *NumThreadsClause =
6736             Dir->getSingleClause<OMPNumThreadsClause>();
6737         CodeGenFunction::LexicalScope Scope(
6738             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6739         if (const auto *PreInit =
6740                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6741           for (const auto *I : PreInit->decls()) {
6742             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6743               CGF.EmitVarDecl(cast<VarDecl>(*I));
6744             } else {
6745               CodeGenFunction::AutoVarEmission Emission =
6746                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6747               CGF.EmitAutoVarCleanups(Emission);
6748             }
6749           }
6750         }
6751         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6752         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6753                                                /*isSigned=*/false);
6754         if (DefaultThreadLimitVal)
6755           NumThreads = CGF.Builder.CreateSelect(
6756               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6757               DefaultThreadLimitVal, NumThreads);
6758       } else {
6759         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6760                                            : CGF.Builder.getInt32(0);
6761       }
6762       // Process condition of the if clause.
6763       if (CondVal) {
6764         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6765                                               CGF.Builder.getInt32(1));
6766       }
6767       return NumThreads;
6768     }
6769     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6770       return CGF.Builder.getInt32(1);
6771     return DefaultThreadLimitVal;
6772   }
6773   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6774                                : CGF.Builder.getInt32(0);
6775 }
6776 
6777 /// Emit the number of threads for a target directive.  Inspect the
6778 /// thread_limit clause associated with a teams construct combined or closely
6779 /// nested with the target directive.
6780 ///
6781 /// Emit the num_threads clause for directives such as 'target parallel' that
6782 /// have no associated teams construct.
6783 ///
6784 /// Otherwise, return nullptr.
6785 static llvm::Value *
6786 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6787                                  const OMPExecutableDirective &D) {
6788   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6789          "Clauses associated with the teams directive expected to be emitted "
6790          "only for the host!");
6791   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6792   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6793          "Expected target-based executable directive.");
6794   CGBuilderTy &Bld = CGF.Builder;
6795   llvm::Value *ThreadLimitVal = nullptr;
6796   llvm::Value *NumThreadsVal = nullptr;
6797   switch (DirectiveKind) {
6798   case OMPD_target: {
6799     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6800     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6801       return NumThreads;
6802     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6803         CGF.getContext(), CS->getCapturedStmt());
6804     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6805       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6806         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6807         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6808         const auto *ThreadLimitClause =
6809             Dir->getSingleClause<OMPThreadLimitClause>();
6810         CodeGenFunction::LexicalScope Scope(
6811             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6812         if (const auto *PreInit =
6813                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6814           for (const auto *I : PreInit->decls()) {
6815             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6816               CGF.EmitVarDecl(cast<VarDecl>(*I));
6817             } else {
6818               CodeGenFunction::AutoVarEmission Emission =
6819                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6820               CGF.EmitAutoVarCleanups(Emission);
6821             }
6822           }
6823         }
6824         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6825             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6826         ThreadLimitVal =
6827             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6828       }
6829       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6830           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6831         CS = Dir->getInnermostCapturedStmt();
6832         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6833             CGF.getContext(), CS->getCapturedStmt());
6834         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6835       }
6836       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6837           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6838         CS = Dir->getInnermostCapturedStmt();
6839         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6840           return NumThreads;
6841       }
6842       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6843         return Bld.getInt32(1);
6844     }
6845     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6846   }
6847   case OMPD_target_teams: {
6848     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6849       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6850       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6851       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6852           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6853       ThreadLimitVal =
6854           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6855     }
6856     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6857     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6858       return NumThreads;
6859     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6860         CGF.getContext(), CS->getCapturedStmt());
6861     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6862       if (Dir->getDirectiveKind() == OMPD_distribute) {
6863         CS = Dir->getInnermostCapturedStmt();
6864         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6865           return NumThreads;
6866       }
6867     }
6868     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6869   }
6870   case OMPD_target_teams_distribute:
6871     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6872       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6873       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6874       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6875           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6876       ThreadLimitVal =
6877           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6878     }
6879     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6880   case OMPD_target_parallel:
6881   case OMPD_target_parallel_for:
6882   case OMPD_target_parallel_for_simd:
6883   case OMPD_target_teams_distribute_parallel_for:
6884   case OMPD_target_teams_distribute_parallel_for_simd: {
6885     llvm::Value *CondVal = nullptr;
6886     // Handle if clause. If if clause present, the number of threads is
6887     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6888     if (D.hasClausesOfKind<OMPIfClause>()) {
6889       const OMPIfClause *IfClause = nullptr;
6890       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6891         if (C->getNameModifier() == OMPD_unknown ||
6892             C->getNameModifier() == OMPD_parallel) {
6893           IfClause = C;
6894           break;
6895         }
6896       }
6897       if (IfClause) {
6898         const Expr *Cond = IfClause->getCondition();
6899         bool Result;
6900         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6901           if (!Result)
6902             return Bld.getInt32(1);
6903         } else {
6904           CodeGenFunction::RunCleanupsScope Scope(CGF);
6905           CondVal = CGF.EvaluateExprAsBool(Cond);
6906         }
6907       }
6908     }
6909     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6910       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6911       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6912       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6913           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6914       ThreadLimitVal =
6915           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6916     }
6917     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6918       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6919       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6920       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6921           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6922       NumThreadsVal =
6923           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6924       ThreadLimitVal = ThreadLimitVal
6925                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6926                                                                 ThreadLimitVal),
6927                                               NumThreadsVal, ThreadLimitVal)
6928                            : NumThreadsVal;
6929     }
6930     if (!ThreadLimitVal)
6931       ThreadLimitVal = Bld.getInt32(0);
6932     if (CondVal)
6933       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6934     return ThreadLimitVal;
6935   }
6936   case OMPD_target_teams_distribute_simd:
6937   case OMPD_target_simd:
6938     return Bld.getInt32(1);
6939   case OMPD_parallel:
6940   case OMPD_for:
6941   case OMPD_parallel_for:
6942   case OMPD_parallel_master:
6943   case OMPD_parallel_sections:
6944   case OMPD_for_simd:
6945   case OMPD_parallel_for_simd:
6946   case OMPD_cancel:
6947   case OMPD_cancellation_point:
6948   case OMPD_ordered:
6949   case OMPD_threadprivate:
6950   case OMPD_allocate:
6951   case OMPD_task:
6952   case OMPD_simd:
6953   case OMPD_sections:
6954   case OMPD_section:
6955   case OMPD_single:
6956   case OMPD_master:
6957   case OMPD_critical:
6958   case OMPD_taskyield:
6959   case OMPD_barrier:
6960   case OMPD_taskwait:
6961   case OMPD_taskgroup:
6962   case OMPD_atomic:
6963   case OMPD_flush:
6964   case OMPD_depobj:
6965   case OMPD_scan:
6966   case OMPD_teams:
6967   case OMPD_target_data:
6968   case OMPD_target_exit_data:
6969   case OMPD_target_enter_data:
6970   case OMPD_distribute:
6971   case OMPD_distribute_simd:
6972   case OMPD_distribute_parallel_for:
6973   case OMPD_distribute_parallel_for_simd:
6974   case OMPD_teams_distribute:
6975   case OMPD_teams_distribute_simd:
6976   case OMPD_teams_distribute_parallel_for:
6977   case OMPD_teams_distribute_parallel_for_simd:
6978   case OMPD_target_update:
6979   case OMPD_declare_simd:
6980   case OMPD_declare_variant:
6981   case OMPD_begin_declare_variant:
6982   case OMPD_end_declare_variant:
6983   case OMPD_declare_target:
6984   case OMPD_end_declare_target:
6985   case OMPD_declare_reduction:
6986   case OMPD_declare_mapper:
6987   case OMPD_taskloop:
6988   case OMPD_taskloop_simd:
6989   case OMPD_master_taskloop:
6990   case OMPD_master_taskloop_simd:
6991   case OMPD_parallel_master_taskloop:
6992   case OMPD_parallel_master_taskloop_simd:
6993   case OMPD_requires:
6994   case OMPD_unknown:
6995     break;
6996   default:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr or use_device_addr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// 0x800 is reserved for compatibility with XLC.
7047     /// Produce a runtime error if the data is not already allocated.
7048     OMP_MAP_PRESENT = 0x1000,
7049     /// The 16 MSBs of the flags indicate whether the entry is member of some
7050     /// struct/class.
7051     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7052     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7053   };
7054 
7055   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7056   static unsigned getFlagMemberOffset() {
7057     unsigned Offset = 0;
7058     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7059          Remain = Remain >> 1)
7060       Offset++;
7061     return Offset;
7062   }
7063 
7064   /// Class that associates information with a base pointer to be passed to the
7065   /// runtime library.
7066   class BasePointerInfo {
7067     /// The base pointer.
7068     llvm::Value *Ptr = nullptr;
7069     /// The base declaration that refers to this device pointer, or null if
7070     /// there is none.
7071     const ValueDecl *DevPtrDecl = nullptr;
7072 
7073   public:
7074     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7075         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7076     llvm::Value *operator*() const { return Ptr; }
7077     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7078     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7079   };
7080 
7081   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7082   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7083   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7084   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7085 
7086   /// This structure contains combined information generated for mappable
7087   /// clauses, including base pointers, pointers, sizes, map types, and
7088   /// user-defined mappers.
7089   struct MapCombinedInfoTy {
7090     MapBaseValuesArrayTy BasePointers;
7091     MapValuesArrayTy Pointers;
7092     MapValuesArrayTy Sizes;
7093     MapFlagsArrayTy Types;
7094     MapMappersArrayTy Mappers;
7095 
7096     /// Append arrays in \a CurInfo.
7097     void append(MapCombinedInfoTy &CurInfo) {
7098       BasePointers.append(CurInfo.BasePointers.begin(),
7099                           CurInfo.BasePointers.end());
7100       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7101       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7102       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7103       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7104     }
7105   };
7106 
7107   /// Map between a struct and the its lowest & highest elements which have been
7108   /// mapped.
7109   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7110   ///                    HE(FieldIndex, Pointer)}
7111   struct StructRangeInfoTy {
7112     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7113         0, Address::invalid()};
7114     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7115         0, Address::invalid()};
7116     Address Base = Address::invalid();
7117   };
7118 
7119 private:
7120   /// Kind that defines how a device pointer has to be returned.
7121   struct MapInfo {
7122     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7123     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7124     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7125     bool ReturnDevicePointer = false;
7126     bool IsImplicit = false;
7127     const ValueDecl *Mapper = nullptr;
7128     bool ForDeviceAddr = false;
7129 
7130     MapInfo() = default;
7131     MapInfo(
7132         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7133         OpenMPMapClauseKind MapType,
7134         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7135         bool IsImplicit, const ValueDecl *Mapper = nullptr,
7136         bool ForDeviceAddr = false)
7137         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7138           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7139           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7140   };
7141 
7142   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7143   /// member and there is no map information about it, then emission of that
7144   /// entry is deferred until the whole struct has been processed.
7145   struct DeferredDevicePtrEntryTy {
7146     const Expr *IE = nullptr;
7147     const ValueDecl *VD = nullptr;
7148     bool ForDeviceAddr = false;
7149 
7150     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7151                              bool ForDeviceAddr)
7152         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7153   };
7154 
7155   /// The target directive from where the mappable clauses were extracted. It
7156   /// is either a executable directive or a user-defined mapper directive.
7157   llvm::PointerUnion<const OMPExecutableDirective *,
7158                      const OMPDeclareMapperDecl *>
7159       CurDir;
7160 
7161   /// Function the directive is being generated for.
7162   CodeGenFunction &CGF;
7163 
7164   /// Set of all first private variables in the current directive.
7165   /// bool data is set to true if the variable is implicitly marked as
7166   /// firstprivate, false otherwise.
7167   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7168 
7169   /// Map between device pointer declarations and their expression components.
7170   /// The key value for declarations in 'this' is null.
7171   llvm::DenseMap<
7172       const ValueDecl *,
7173       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7174       DevPointersMap;
7175 
7176   llvm::Value *getExprTypeSize(const Expr *E) const {
7177     QualType ExprTy = E->getType().getCanonicalType();
7178 
7179     // Calculate the size for array shaping expression.
7180     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7181       llvm::Value *Size =
7182           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7183       for (const Expr *SE : OAE->getDimensions()) {
7184         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7185         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7186                                       CGF.getContext().getSizeType(),
7187                                       SE->getExprLoc());
7188         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7189       }
7190       return Size;
7191     }
7192 
7193     // Reference types are ignored for mapping purposes.
7194     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7195       ExprTy = RefTy->getPointeeType().getCanonicalType();
7196 
7197     // Given that an array section is considered a built-in type, we need to
7198     // do the calculation based on the length of the section instead of relying
7199     // on CGF.getTypeSize(E->getType()).
7200     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7201       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7202                             OAE->getBase()->IgnoreParenImpCasts())
7203                             .getCanonicalType();
7204 
7205       // If there is no length associated with the expression and lower bound is
7206       // not specified too, that means we are using the whole length of the
7207       // base.
7208       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7209           !OAE->getLowerBound())
7210         return CGF.getTypeSize(BaseTy);
7211 
7212       llvm::Value *ElemSize;
7213       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7214         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7215       } else {
7216         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7217         assert(ATy && "Expecting array type if not a pointer type.");
7218         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7219       }
7220 
7221       // If we don't have a length at this point, that is because we have an
7222       // array section with a single element.
7223       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7224         return ElemSize;
7225 
7226       if (const Expr *LenExpr = OAE->getLength()) {
7227         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7228         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7229                                              CGF.getContext().getSizeType(),
7230                                              LenExpr->getExprLoc());
7231         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7232       }
7233       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7234              OAE->getLowerBound() && "expected array_section[lb:].");
7235       // Size = sizetype - lb * elemtype;
7236       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7237       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7238       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7239                                        CGF.getContext().getSizeType(),
7240                                        OAE->getLowerBound()->getExprLoc());
7241       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7242       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7243       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7244       LengthVal = CGF.Builder.CreateSelect(
7245           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7246       return LengthVal;
7247     }
7248     return CGF.getTypeSize(ExprTy);
7249   }
7250 
7251   /// Return the corresponding bits for a given map clause modifier. Add
7252   /// a flag marking the map as a pointer if requested. Add a flag marking the
7253   /// map as the first one of a series of maps that relate to the same map
7254   /// expression.
7255   OpenMPOffloadMappingFlags getMapTypeBits(
7256       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7257       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7258     OpenMPOffloadMappingFlags Bits =
7259         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7260     switch (MapType) {
7261     case OMPC_MAP_alloc:
7262     case OMPC_MAP_release:
7263       // alloc and release is the default behavior in the runtime library,  i.e.
7264       // if we don't pass any bits alloc/release that is what the runtime is
7265       // going to do. Therefore, we don't need to signal anything for these two
7266       // type modifiers.
7267       break;
7268     case OMPC_MAP_to:
7269       Bits |= OMP_MAP_TO;
7270       break;
7271     case OMPC_MAP_from:
7272       Bits |= OMP_MAP_FROM;
7273       break;
7274     case OMPC_MAP_tofrom:
7275       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7276       break;
7277     case OMPC_MAP_delete:
7278       Bits |= OMP_MAP_DELETE;
7279       break;
7280     case OMPC_MAP_unknown:
7281       llvm_unreachable("Unexpected map type!");
7282     }
7283     if (AddPtrFlag)
7284       Bits |= OMP_MAP_PTR_AND_OBJ;
7285     if (AddIsTargetParamFlag)
7286       Bits |= OMP_MAP_TARGET_PARAM;
7287     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7288         != MapModifiers.end())
7289       Bits |= OMP_MAP_ALWAYS;
7290     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7291         != MapModifiers.end())
7292       Bits |= OMP_MAP_CLOSE;
7293     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7294         != MapModifiers.end())
7295       Bits |= OMP_MAP_PRESENT;
7296     return Bits;
7297   }
7298 
7299   /// Return true if the provided expression is a final array section. A
7300   /// final array section, is one whose length can't be proved to be one.
7301   bool isFinalArraySectionExpression(const Expr *E) const {
7302     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7303 
7304     // It is not an array section and therefore not a unity-size one.
7305     if (!OASE)
7306       return false;
7307 
7308     // An array section with no colon always refer to a single element.
7309     if (OASE->getColonLocFirst().isInvalid())
7310       return false;
7311 
7312     const Expr *Length = OASE->getLength();
7313 
7314     // If we don't have a length we have to check if the array has size 1
7315     // for this dimension. Also, we should always expect a length if the
7316     // base type is pointer.
7317     if (!Length) {
7318       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7319                              OASE->getBase()->IgnoreParenImpCasts())
7320                              .getCanonicalType();
7321       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7322         return ATy->getSize().getSExtValue() != 1;
7323       // If we don't have a constant dimension length, we have to consider
7324       // the current section as having any size, so it is not necessarily
7325       // unitary. If it happen to be unity size, that's user fault.
7326       return true;
7327     }
7328 
7329     // Check if the length evaluates to 1.
7330     Expr::EvalResult Result;
7331     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7332       return true; // Can have more that size 1.
7333 
7334     llvm::APSInt ConstLength = Result.Val.getInt();
7335     return ConstLength.getSExtValue() != 1;
7336   }
7337 
7338   /// Generate the base pointers, section pointers, sizes, map type bits, and
7339   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7340   /// map type, map modifier, and expression components. \a IsFirstComponent
7341   /// should be set to true if the provided set of components is the first
7342   /// associated with a capture.
7343   void generateInfoForComponentList(
7344       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7345       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7346       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7347       bool IsFirstComponentList, bool IsImplicit,
7348       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7349       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7350           OverlappedElements = llvm::None) const {
7351     // The following summarizes what has to be generated for each map and the
7352     // types below. The generated information is expressed in this order:
7353     // base pointer, section pointer, size, flags
7354     // (to add to the ones that come from the map type and modifier).
7355     //
7356     // double d;
7357     // int i[100];
7358     // float *p;
7359     //
7360     // struct S1 {
7361     //   int i;
7362     //   float f[50];
7363     // }
7364     // struct S2 {
7365     //   int i;
7366     //   float f[50];
7367     //   S1 s;
7368     //   double *p;
7369     //   struct S2 *ps;
7370     // }
7371     // S2 s;
7372     // S2 *ps;
7373     //
7374     // map(d)
7375     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7376     //
7377     // map(i)
7378     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7379     //
7380     // map(i[1:23])
7381     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7382     //
7383     // map(p)
7384     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7385     //
7386     // map(p[1:24])
7387     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7388     // for data directives
7389     // p, p, sizeof(float*), TARGET_PARAM | TO | FROM
7390     // p, &p[1], 24*sizeof(float), PTR_AND_OBJ | TO | FROM
7391     //
7392     // map(s)
7393     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7394     //
7395     // map(s.i)
7396     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7397     //
7398     // map(s.s.f)
7399     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7400     //
7401     // map(s.p)
7402     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7403     //
7404     // map(to: s.p[:22])
7405     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7406     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7407     // &(s.p), &(s.p[0]), 22*sizeof(double),
7408     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7409     // (*) alloc space for struct members, only this is a target parameter
7410     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7411     //      optimizes this entry out, same in the examples below)
7412     // (***) map the pointee (map: to)
7413     //
7414     // map(s.ps)
7415     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7416     //
7417     // map(from: s.ps->s.i)
7418     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7419     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7420     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7421     //
7422     // map(to: s.ps->ps)
7423     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7424     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7425     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7426     //
7427     // map(s.ps->ps->ps)
7428     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7429     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7430     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7431     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7432     //
7433     // map(to: s.ps->ps->s.f[:22])
7434     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7435     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7436     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7437     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7438     //
7439     // map(ps)
7440     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7441     //
7442     // map(ps->i)
7443     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7444     //
7445     // map(ps->s.f)
7446     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7447     //
7448     // map(from: ps->p)
7449     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7450     //
7451     // map(to: ps->p[:22])
7452     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7453     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7454     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7455     //
7456     // map(ps->ps)
7457     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7458     //
7459     // map(from: ps->ps->s.i)
7460     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7461     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7462     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7463     //
7464     // map(from: ps->ps->ps)
7465     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7466     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7467     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7468     //
7469     // map(ps->ps->ps->ps)
7470     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7471     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7472     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7473     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7474     //
7475     // map(to: ps->ps->ps->s.f[:22])
7476     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7477     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7478     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7479     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7480     //
7481     // map(to: s.f[:22]) map(from: s.p[:33])
7482     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7483     //     sizeof(double*) (**), TARGET_PARAM
7484     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7485     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7486     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7487     // (*) allocate contiguous space needed to fit all mapped members even if
7488     //     we allocate space for members not mapped (in this example,
7489     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7490     //     them as well because they fall between &s.f[0] and &s.p)
7491     //
7492     // map(from: s.f[:22]) map(to: ps->p[:33])
7493     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7494     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7495     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7496     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7497     // (*) the struct this entry pertains to is the 2nd element in the list of
7498     //     arguments, hence MEMBER_OF(2)
7499     //
7500     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7501     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7502     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7503     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7504     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7505     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7506     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7507     // (*) the struct this entry pertains to is the 4th element in the list
7508     //     of arguments, hence MEMBER_OF(4)
7509 
7510     // Track if the map information being generated is the first for a capture.
7511     bool IsCaptureFirstInfo = IsFirstComponentList;
7512     // When the variable is on a declare target link or in a to clause with
7513     // unified memory, a reference is needed to hold the host/device address
7514     // of the variable.
7515     bool RequiresReference = false;
7516 
7517     // Scan the components from the base to the complete expression.
7518     auto CI = Components.rbegin();
7519     auto CE = Components.rend();
7520     auto I = CI;
7521 
7522     // Track if the map information being generated is the first for a list of
7523     // components.
7524     bool IsExpressionFirstInfo = true;
7525     Address BP = Address::invalid();
7526     const Expr *AssocExpr = I->getAssociatedExpression();
7527     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7528     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7529     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7530 
7531     if (isa<MemberExpr>(AssocExpr)) {
7532       // The base is the 'this' pointer. The content of the pointer is going
7533       // to be the base of the field being mapped.
7534       BP = CGF.LoadCXXThisAddress();
7535     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7536                (OASE &&
7537                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7538       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7539     } else if (OAShE &&
7540                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7541       BP = Address(
7542           CGF.EmitScalarExpr(OAShE->getBase()),
7543           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7544     } else {
7545       // The base is the reference to the variable.
7546       // BP = &Var.
7547       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7548       if (const auto *VD =
7549               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7550         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7551                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7552           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7553               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7554                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7555             RequiresReference = true;
7556             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7557           }
7558         }
7559       }
7560 
7561       // If the variable is a pointer and is being dereferenced (i.e. is not
7562       // the last component), the base has to be the pointer itself, not its
7563       // reference. References are ignored for mapping purposes.
7564       QualType Ty =
7565           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7566       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7567         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7568 
7569         // For non-data directives, we do not need to generate individual map
7570         // information for the pointer, it can be associated with the combined
7571         // storage.
7572         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7573             !CurDir.is<const OMPExecutableDirective *>() ||
7574             !isOpenMPTargetDataManagementDirective(
7575                 CurDir.get<const OMPExecutableDirective *>()
7576                     ->getDirectiveKind()))
7577           ++I;
7578       }
7579     }
7580 
7581     // Track whether a component of the list should be marked as MEMBER_OF some
7582     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7583     // in a component list should be marked as MEMBER_OF, all subsequent entries
7584     // do not belong to the base struct. E.g.
7585     // struct S2 s;
7586     // s.ps->ps->ps->f[:]
7587     //   (1) (2) (3) (4)
7588     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7589     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7590     // is the pointee of ps(2) which is not member of struct s, so it should not
7591     // be marked as such (it is still PTR_AND_OBJ).
7592     // The variable is initialized to false so that PTR_AND_OBJ entries which
7593     // are not struct members are not considered (e.g. array of pointers to
7594     // data).
7595     bool ShouldBeMemberOf = false;
7596 
7597     // Variable keeping track of whether or not we have encountered a component
7598     // in the component list which is a member expression. Useful when we have a
7599     // pointer or a final array section, in which case it is the previous
7600     // component in the list which tells us whether we have a member expression.
7601     // E.g. X.f[:]
7602     // While processing the final array section "[:]" it is "f" which tells us
7603     // whether we are dealing with a member of a declared struct.
7604     const MemberExpr *EncounteredME = nullptr;
7605 
7606     for (; I != CE; ++I) {
7607       // If the current component is member of a struct (parent struct) mark it.
7608       if (!EncounteredME) {
7609         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7610         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7611         // as MEMBER_OF the parent struct.
7612         if (EncounteredME)
7613           ShouldBeMemberOf = true;
7614       }
7615 
7616       auto Next = std::next(I);
7617 
7618       // We need to generate the addresses and sizes if this is the last
7619       // component, if the component is a pointer or if it is an array section
7620       // whose length can't be proved to be one. If this is a pointer, it
7621       // becomes the base address for the following components.
7622 
7623       // A final array section, is one whose length can't be proved to be one.
7624       bool IsFinalArraySection =
7625           isFinalArraySectionExpression(I->getAssociatedExpression());
7626 
7627       // Get information on whether the element is a pointer. Have to do a
7628       // special treatment for array sections given that they are built-in
7629       // types.
7630       const auto *OASE =
7631           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7632       const auto *OAShE =
7633           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7634       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7635       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7636       bool IsPointer =
7637           OAShE ||
7638           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7639                        .getCanonicalType()
7640                        ->isAnyPointerType()) ||
7641           I->getAssociatedExpression()->getType()->isAnyPointerType();
7642       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7643 
7644       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7645         // If this is not the last component, we expect the pointer to be
7646         // associated with an array expression or member expression.
7647         assert((Next == CE ||
7648                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7649                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7650                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7651                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7652                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7653                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7654                "Unexpected expression");
7655 
7656         Address LB = Address::invalid();
7657         if (OAShE) {
7658           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7659                        CGF.getContext().getTypeAlignInChars(
7660                            OAShE->getBase()->getType()));
7661         } else {
7662           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7663                    .getAddress(CGF);
7664         }
7665 
7666         // If this component is a pointer inside the base struct then we don't
7667         // need to create any entry for it - it will be combined with the object
7668         // it is pointing to into a single PTR_AND_OBJ entry.
7669         bool IsMemberPointerOrAddr =
7670             (IsPointer || ForDeviceAddr) && EncounteredME &&
7671             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7672              EncounteredME);
7673         if (!OverlappedElements.empty()) {
7674           // Handle base element with the info for overlapped elements.
7675           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7676           assert(Next == CE &&
7677                  "Expected last element for the overlapped elements.");
7678           assert(!IsPointer &&
7679                  "Unexpected base element with the pointer type.");
7680           // Mark the whole struct as the struct that requires allocation on the
7681           // device.
7682           PartialStruct.LowestElem = {0, LB};
7683           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7684               I->getAssociatedExpression()->getType());
7685           Address HB = CGF.Builder.CreateConstGEP(
7686               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7687                                                               CGF.VoidPtrTy),
7688               TypeSize.getQuantity() - 1);
7689           PartialStruct.HighestElem = {
7690               std::numeric_limits<decltype(
7691                   PartialStruct.HighestElem.first)>::max(),
7692               HB};
7693           PartialStruct.Base = BP;
7694           // Emit data for non-overlapped data.
7695           OpenMPOffloadMappingFlags Flags =
7696               OMP_MAP_MEMBER_OF |
7697               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7698                              /*AddPtrFlag=*/false,
7699                              /*AddIsTargetParamFlag=*/false);
7700           LB = BP;
7701           llvm::Value *Size = nullptr;
7702           // Do bitcopy of all non-overlapped structure elements.
7703           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7704                    Component : OverlappedElements) {
7705             Address ComponentLB = Address::invalid();
7706             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7707                  Component) {
7708               if (MC.getAssociatedDeclaration()) {
7709                 ComponentLB =
7710                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7711                         .getAddress(CGF);
7712                 Size = CGF.Builder.CreatePtrDiff(
7713                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7714                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7715                 break;
7716               }
7717             }
7718             CombinedInfo.BasePointers.push_back(BP.getPointer());
7719             CombinedInfo.Pointers.push_back(LB.getPointer());
7720             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7721                 Size, CGF.Int64Ty, /*isSigned=*/true));
7722             CombinedInfo.Types.push_back(Flags);
7723             CombinedInfo.Mappers.push_back(nullptr);
7724             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7725           }
7726           CombinedInfo.BasePointers.push_back(BP.getPointer());
7727           CombinedInfo.Pointers.push_back(LB.getPointer());
7728           Size = CGF.Builder.CreatePtrDiff(
7729               CGF.EmitCastToVoidPtr(
7730                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7731               CGF.EmitCastToVoidPtr(LB.getPointer()));
7732           CombinedInfo.Sizes.push_back(
7733               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7734           CombinedInfo.Types.push_back(Flags);
7735           CombinedInfo.Mappers.push_back(nullptr);
7736           break;
7737         }
7738         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7739         if (!IsMemberPointerOrAddr) {
7740           CombinedInfo.BasePointers.push_back(BP.getPointer());
7741           CombinedInfo.Pointers.push_back(LB.getPointer());
7742           CombinedInfo.Sizes.push_back(
7743               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7744 
7745           // If Mapper is valid, the last component inherits the mapper.
7746           bool HasMapper = Mapper && Next == CE;
7747           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7748 
7749           // We need to add a pointer flag for each map that comes from the
7750           // same expression except for the first one. We also need to signal
7751           // this map is the first one that relates with the current capture
7752           // (there is a set of entries for each capture).
7753           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7754               MapType, MapModifiers, IsImplicit,
7755               !IsExpressionFirstInfo || RequiresReference,
7756               IsCaptureFirstInfo && !RequiresReference);
7757 
7758           if (!IsExpressionFirstInfo) {
7759             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7760             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7761             if (IsPointer)
7762               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7763                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7764 
7765             if (ShouldBeMemberOf) {
7766               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7767               // should be later updated with the correct value of MEMBER_OF.
7768               Flags |= OMP_MAP_MEMBER_OF;
7769               // From now on, all subsequent PTR_AND_OBJ entries should not be
7770               // marked as MEMBER_OF.
7771               ShouldBeMemberOf = false;
7772             }
7773           }
7774 
7775           CombinedInfo.Types.push_back(Flags);
7776         }
7777 
7778         // If we have encountered a member expression so far, keep track of the
7779         // mapped member. If the parent is "*this", then the value declaration
7780         // is nullptr.
7781         if (EncounteredME) {
7782           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7783           unsigned FieldIndex = FD->getFieldIndex();
7784 
7785           // Update info about the lowest and highest elements for this struct
7786           if (!PartialStruct.Base.isValid()) {
7787             PartialStruct.LowestElem = {FieldIndex, LB};
7788             if (IsFinalArraySection) {
7789               Address HB =
7790                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7791                       .getAddress(CGF);
7792               PartialStruct.HighestElem = {FieldIndex, HB};
7793             } else {
7794               PartialStruct.HighestElem = {FieldIndex, LB};
7795             }
7796             PartialStruct.Base = BP;
7797           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7798             PartialStruct.LowestElem = {FieldIndex, LB};
7799           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7800             PartialStruct.HighestElem = {FieldIndex, LB};
7801           }
7802         }
7803 
7804         // If we have a final array section, we are done with this expression.
7805         if (IsFinalArraySection)
7806           break;
7807 
7808         // The pointer becomes the base for the next element.
7809         if (Next != CE)
7810           BP = LB;
7811 
7812         IsExpressionFirstInfo = false;
7813         IsCaptureFirstInfo = false;
7814       }
7815     }
7816   }
7817 
7818   /// Return the adjusted map modifiers if the declaration a capture refers to
7819   /// appears in a first-private clause. This is expected to be used only with
7820   /// directives that start with 'target'.
7821   MappableExprsHandler::OpenMPOffloadMappingFlags
7822   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7823     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7824 
7825     // A first private variable captured by reference will use only the
7826     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7827     // declaration is known as first-private in this handler.
7828     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7829       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7830           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7831         return MappableExprsHandler::OMP_MAP_ALWAYS |
7832                MappableExprsHandler::OMP_MAP_TO;
7833       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7834         return MappableExprsHandler::OMP_MAP_TO |
7835                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7836       return MappableExprsHandler::OMP_MAP_PRIVATE |
7837              MappableExprsHandler::OMP_MAP_TO;
7838     }
7839     return MappableExprsHandler::OMP_MAP_TO |
7840            MappableExprsHandler::OMP_MAP_FROM;
7841   }
7842 
7843   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7844     // Rotate by getFlagMemberOffset() bits.
7845     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7846                                                   << getFlagMemberOffset());
7847   }
7848 
7849   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7850                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7851     // If the entry is PTR_AND_OBJ but has not been marked with the special
7852     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7853     // marked as MEMBER_OF.
7854     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7855         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7856       return;
7857 
7858     // Reset the placeholder value to prepare the flag for the assignment of the
7859     // proper MEMBER_OF value.
7860     Flags &= ~OMP_MAP_MEMBER_OF;
7861     Flags |= MemberOfFlag;
7862   }
7863 
7864   void getPlainLayout(const CXXRecordDecl *RD,
7865                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7866                       bool AsBase) const {
7867     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7868 
7869     llvm::StructType *St =
7870         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7871 
7872     unsigned NumElements = St->getNumElements();
7873     llvm::SmallVector<
7874         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7875         RecordLayout(NumElements);
7876 
7877     // Fill bases.
7878     for (const auto &I : RD->bases()) {
7879       if (I.isVirtual())
7880         continue;
7881       const auto *Base = I.getType()->getAsCXXRecordDecl();
7882       // Ignore empty bases.
7883       if (Base->isEmpty() || CGF.getContext()
7884                                  .getASTRecordLayout(Base)
7885                                  .getNonVirtualSize()
7886                                  .isZero())
7887         continue;
7888 
7889       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7890       RecordLayout[FieldIndex] = Base;
7891     }
7892     // Fill in virtual bases.
7893     for (const auto &I : RD->vbases()) {
7894       const auto *Base = I.getType()->getAsCXXRecordDecl();
7895       // Ignore empty bases.
7896       if (Base->isEmpty())
7897         continue;
7898       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7899       if (RecordLayout[FieldIndex])
7900         continue;
7901       RecordLayout[FieldIndex] = Base;
7902     }
7903     // Fill in all the fields.
7904     assert(!RD->isUnion() && "Unexpected union.");
7905     for (const auto *Field : RD->fields()) {
7906       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7907       // will fill in later.)
7908       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7909         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7910         RecordLayout[FieldIndex] = Field;
7911       }
7912     }
7913     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7914              &Data : RecordLayout) {
7915       if (Data.isNull())
7916         continue;
7917       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7918         getPlainLayout(Base, Layout, /*AsBase=*/true);
7919       else
7920         Layout.push_back(Data.get<const FieldDecl *>());
7921     }
7922   }
7923 
7924 public:
7925   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7926       : CurDir(&Dir), CGF(CGF) {
7927     // Extract firstprivate clause information.
7928     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7929       for (const auto *D : C->varlists())
7930         FirstPrivateDecls.try_emplace(
7931             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7932     // Extract implicit firstprivates from uses_allocators clauses.
7933     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7934       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7935         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7936         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7937           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7938                                         /*Implicit=*/true);
7939         else if (const auto *VD = dyn_cast<VarDecl>(
7940                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7941                          ->getDecl()))
7942           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7943       }
7944     }
7945     // Extract device pointer clause information.
7946     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7947       for (auto L : C->component_lists())
7948         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
7949   }
7950 
7951   /// Constructor for the declare mapper directive.
7952   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7953       : CurDir(&Dir), CGF(CGF) {}
7954 
7955   /// Generate code for the combined entry if we have a partially mapped struct
7956   /// and take care of the mapping flags of the arguments corresponding to
7957   /// individual struct members.
7958   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
7959                          MapFlagsArrayTy &CurTypes,
7960                          const StructRangeInfoTy &PartialStruct) const {
7961     // Base is the base of the struct
7962     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
7963     // Pointer is the address of the lowest element
7964     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7965     CombinedInfo.Pointers.push_back(LB);
7966     // There should not be a mapper for a combined entry.
7967     CombinedInfo.Mappers.push_back(nullptr);
7968     // Size is (addr of {highest+1} element) - (addr of lowest element)
7969     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7970     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7971     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7972     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7973     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7974     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7975                                                   /*isSigned=*/false);
7976     CombinedInfo.Sizes.push_back(Size);
7977     // Map type is always TARGET_PARAM
7978     CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM);
7979     // If any element has the present modifier, then make sure the runtime
7980     // doesn't attempt to allocate the struct.
7981     if (CurTypes.end() !=
7982         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
7983           return Type & OMP_MAP_PRESENT;
7984         }))
7985       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
7986     // Remove TARGET_PARAM flag from the first element
7987     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7988 
7989     // All other current entries will be MEMBER_OF the combined entry
7990     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7991     // 0xFFFF in the MEMBER_OF field).
7992     OpenMPOffloadMappingFlags MemberOfFlag =
7993         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
7994     for (auto &M : CurTypes)
7995       setCorrectMemberOfFlag(M, MemberOfFlag);
7996   }
7997 
7998   /// Generate all the base pointers, section pointers, sizes, map types, and
7999   /// mappers for the extracted mappable expressions (all included in \a
8000   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8001   /// pair of the relevant declaration and index where it occurs is appended to
8002   /// the device pointers info array.
8003   void generateAllInfo(
8004       MapCombinedInfoTy &CombinedInfo,
8005       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8006           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8007     // We have to process the component lists that relate with the same
8008     // declaration in a single chunk so that we can generate the map flags
8009     // correctly. Therefore, we organize all lists in a map.
8010     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8011 
8012     // Helper function to fill the information map for the different supported
8013     // clauses.
8014     auto &&InfoGen =
8015         [&Info, &SkipVarSet](
8016             const ValueDecl *D,
8017             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8018             OpenMPMapClauseKind MapType,
8019             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8020             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8021             bool ForDeviceAddr = false) {
8022           const ValueDecl *VD =
8023               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8024           if (SkipVarSet.count(VD))
8025             return;
8026           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8027                                 IsImplicit, Mapper, ForDeviceAddr);
8028         };
8029 
8030     assert(CurDir.is<const OMPExecutableDirective *>() &&
8031            "Expect a executable directive");
8032     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8033     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8034       for (const auto L : C->component_lists()) {
8035         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8036                 C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false,
8037                 C->isImplicit(), std::get<2>(L));
8038       }
8039     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8040       for (const auto L : C->component_lists()) {
8041         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8042                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8043       }
8044     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8045       for (const auto L : C->component_lists()) {
8046         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8047                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8048       }
8049 
8050     // Look at the use_device_ptr clause information and mark the existing map
8051     // entries as such. If there is no map information for an entry in the
8052     // use_device_ptr list, we create one with map type 'alloc' and zero size
8053     // section. It is the user fault if that was not mapped before. If there is
8054     // no map information and the pointer is a struct member, then we defer the
8055     // emission of that entry until the whole struct has been processed.
8056     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8057         DeferredInfo;
8058 
8059     for (const auto *C :
8060          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8061       for (const auto L : C->component_lists()) {
8062         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8063             std::get<1>(L);
8064         assert(!Components.empty() &&
8065                "Not expecting empty list of components!");
8066         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8067         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8068         const Expr *IE = Components.back().getAssociatedExpression();
8069         // If the first component is a member expression, we have to look into
8070         // 'this', which maps to null in the map of map information. Otherwise
8071         // look directly for the information.
8072         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8073 
8074         // We potentially have map information for this declaration already.
8075         // Look for the first set of components that refer to it.
8076         if (It != Info.end()) {
8077           auto CI = std::find_if(
8078               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8079                 return MI.Components.back().getAssociatedDeclaration() == VD;
8080               });
8081           // If we found a map entry, signal that the pointer has to be returned
8082           // and move on to the next declaration.
8083           if (CI != It->second.end()) {
8084             CI->ReturnDevicePointer = true;
8085             continue;
8086           }
8087         }
8088 
8089         // We didn't find any match in our map information - generate a zero
8090         // size array section - if the pointer is a struct member we defer this
8091         // action until the whole struct has been processed.
8092         if (isa<MemberExpr>(IE)) {
8093           // Insert the pointer into Info to be processed by
8094           // generateInfoForComponentList. Because it is a member pointer
8095           // without a pointee, no entry will be generated for it, therefore
8096           // we need to generate one after the whole struct has been processed.
8097           // Nonetheless, generateInfoForComponentList must be called to take
8098           // the pointer into account for the calculation of the range of the
8099           // partial struct.
8100           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None,
8101                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8102           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8103         } else {
8104           llvm::Value *Ptr =
8105               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8106           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8107           CombinedInfo.Pointers.push_back(Ptr);
8108           CombinedInfo.Sizes.push_back(
8109               llvm::Constant::getNullValue(CGF.Int64Ty));
8110           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM |
8111                                        OMP_MAP_TARGET_PARAM);
8112           CombinedInfo.Mappers.push_back(nullptr);
8113         }
8114       }
8115     }
8116 
8117     // Look at the use_device_addr clause information and mark the existing map
8118     // entries as such. If there is no map information for an entry in the
8119     // use_device_addr list, we create one with map type 'alloc' and zero size
8120     // section. It is the user fault if that was not mapped before. If there is
8121     // no map information and the pointer is a struct member, then we defer the
8122     // emission of that entry until the whole struct has been processed.
8123     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8124     for (const auto *C :
8125          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8126       for (const auto L : C->component_lists()) {
8127         assert(!std::get<1>(L).empty() &&
8128                "Not expecting empty list of components!");
8129         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8130         if (!Processed.insert(VD).second)
8131           continue;
8132         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8133         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8134         // If the first component is a member expression, we have to look into
8135         // 'this', which maps to null in the map of map information. Otherwise
8136         // look directly for the information.
8137         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8138 
8139         // We potentially have map information for this declaration already.
8140         // Look for the first set of components that refer to it.
8141         if (It != Info.end()) {
8142           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8143             return MI.Components.back().getAssociatedDeclaration() == VD;
8144           });
8145           // If we found a map entry, signal that the pointer has to be returned
8146           // and move on to the next declaration.
8147           if (CI != It->second.end()) {
8148             CI->ReturnDevicePointer = true;
8149             continue;
8150           }
8151         }
8152 
8153         // We didn't find any match in our map information - generate a zero
8154         // size array section - if the pointer is a struct member we defer this
8155         // action until the whole struct has been processed.
8156         if (isa<MemberExpr>(IE)) {
8157           // Insert the pointer into Info to be processed by
8158           // generateInfoForComponentList. Because it is a member pointer
8159           // without a pointee, no entry will be generated for it, therefore
8160           // we need to generate one after the whole struct has been processed.
8161           // Nonetheless, generateInfoForComponentList must be called to take
8162           // the pointer into account for the calculation of the range of the
8163           // partial struct.
8164           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8165                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr,
8166                   /*ForDeviceAddr=*/true);
8167           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8168         } else {
8169           llvm::Value *Ptr;
8170           if (IE->isGLValue())
8171             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8172           else
8173             Ptr = CGF.EmitScalarExpr(IE);
8174           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8175           CombinedInfo.Pointers.push_back(Ptr);
8176           CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8177           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8178           CombinedInfo.Mappers.push_back(nullptr);
8179         }
8180       }
8181     }
8182 
8183     for (const auto &M : Info) {
8184       // We need to know when we generate information for the first component
8185       // associated with a capture, because the mapping flags depend on it.
8186       bool IsFirstComponentList = true;
8187 
8188       // Temporary generated information.
8189       MapCombinedInfoTy CurInfo;
8190       StructRangeInfoTy PartialStruct;
8191 
8192       for (const MapInfo &L : M.second) {
8193         assert(!L.Components.empty() &&
8194                "Not expecting declaration with no component lists.");
8195 
8196         // Remember the current base pointer index.
8197         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8198         generateInfoForComponentList(
8199             L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
8200             IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8201 
8202         // If this entry relates with a device pointer, set the relevant
8203         // declaration and add the 'return pointer' flag.
8204         if (L.ReturnDevicePointer) {
8205           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8206                  "Unexpected number of mapped base pointers.");
8207 
8208           const ValueDecl *RelevantVD =
8209               L.Components.back().getAssociatedDeclaration();
8210           assert(RelevantVD &&
8211                  "No relevant declaration related with device pointer??");
8212 
8213           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8214               RelevantVD);
8215           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8216         }
8217         IsFirstComponentList = false;
8218       }
8219 
8220       // Append any pending zero-length pointers which are struct members and
8221       // used with use_device_ptr or use_device_addr.
8222       auto CI = DeferredInfo.find(M.first);
8223       if (CI != DeferredInfo.end()) {
8224         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8225           llvm::Value *BasePtr;
8226           llvm::Value *Ptr;
8227           if (L.ForDeviceAddr) {
8228             if (L.IE->isGLValue())
8229               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8230             else
8231               Ptr = this->CGF.EmitScalarExpr(L.IE);
8232             BasePtr = Ptr;
8233             // Entry is RETURN_PARAM. Also, set the placeholder value
8234             // MEMBER_OF=FFFF so that the entry is later updated with the
8235             // correct value of MEMBER_OF.
8236             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8237           } else {
8238             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8239             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8240                                              L.IE->getExprLoc());
8241             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8242             // value MEMBER_OF=FFFF so that the entry is later updated with the
8243             // correct value of MEMBER_OF.
8244             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8245                                     OMP_MAP_MEMBER_OF);
8246           }
8247           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8248           CurInfo.Pointers.push_back(Ptr);
8249           CurInfo.Sizes.push_back(
8250               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8251           CurInfo.Mappers.push_back(nullptr);
8252         }
8253       }
8254 
8255       // If there is an entry in PartialStruct it means we have a struct with
8256       // individual members mapped. Emit an extra combined entry.
8257       if (PartialStruct.Base.isValid())
8258         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8259 
8260       // We need to append the results of this capture to what we already have.
8261       CombinedInfo.append(CurInfo);
8262     }
8263   }
8264 
8265   /// Generate all the base pointers, section pointers, sizes, map types, and
8266   /// mappers for the extracted map clauses of user-defined mapper (all included
8267   /// in \a CombinedInfo).
8268   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8269     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8270            "Expect a declare mapper directive");
8271     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8272     // We have to process the component lists that relate with the same
8273     // declaration in a single chunk so that we can generate the map flags
8274     // correctly. Therefore, we organize all lists in a map.
8275     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8276 
8277     // Fill the information map for map clauses.
8278     for (const auto *C : CurMapperDir->clauselists()) {
8279       const auto *MC = cast<OMPMapClause>(C);
8280       for (const auto L : MC->component_lists()) {
8281         const ValueDecl *VD =
8282             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8283                            : nullptr;
8284         // Get the corresponding user-defined mapper.
8285         Info[VD].emplace_back(
8286             std::get<1>(L), MC->getMapType(), MC->getMapTypeModifiers(),
8287             /*ReturnDevicePointer=*/false, MC->isImplicit(), std::get<2>(L));
8288       }
8289     }
8290 
8291     for (const auto &M : Info) {
8292       // We need to know when we generate information for the first component
8293       // associated with a capture, because the mapping flags depend on it.
8294       bool IsFirstComponentList = true;
8295 
8296       // Temporary generated information.
8297       MapCombinedInfoTy CurInfo;
8298       StructRangeInfoTy PartialStruct;
8299 
8300       for (const MapInfo &L : M.second) {
8301         assert(!L.Components.empty() &&
8302                "Not expecting declaration with no component lists.");
8303         generateInfoForComponentList(
8304             L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
8305             IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8306         IsFirstComponentList = false;
8307       }
8308 
8309       // If there is an entry in PartialStruct it means we have a struct with
8310       // individual members mapped. Emit an extra combined entry.
8311       if (PartialStruct.Base.isValid())
8312         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8313 
8314       // We need to append the results of this capture to what we already have.
8315       CombinedInfo.append(CurInfo);
8316     }
8317   }
8318 
8319   /// Emit capture info for lambdas for variables captured by reference.
8320   void generateInfoForLambdaCaptures(
8321       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8322       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8323     const auto *RD = VD->getType()
8324                          .getCanonicalType()
8325                          .getNonReferenceType()
8326                          ->getAsCXXRecordDecl();
8327     if (!RD || !RD->isLambda())
8328       return;
8329     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8330     LValue VDLVal = CGF.MakeAddrLValue(
8331         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8332     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8333     FieldDecl *ThisCapture = nullptr;
8334     RD->getCaptureFields(Captures, ThisCapture);
8335     if (ThisCapture) {
8336       LValue ThisLVal =
8337           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8338       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8339       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8340                                  VDLVal.getPointer(CGF));
8341       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8342       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8343       CombinedInfo.Sizes.push_back(
8344           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8345                                     CGF.Int64Ty, /*isSigned=*/true));
8346       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8347                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8348       CombinedInfo.Mappers.push_back(nullptr);
8349     }
8350     for (const LambdaCapture &LC : RD->captures()) {
8351       if (!LC.capturesVariable())
8352         continue;
8353       const VarDecl *VD = LC.getCapturedVar();
8354       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8355         continue;
8356       auto It = Captures.find(VD);
8357       assert(It != Captures.end() && "Found lambda capture without field.");
8358       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8359       if (LC.getCaptureKind() == LCK_ByRef) {
8360         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8361         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8362                                    VDLVal.getPointer(CGF));
8363         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8364         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8365         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8366             CGF.getTypeSize(
8367                 VD->getType().getCanonicalType().getNonReferenceType()),
8368             CGF.Int64Ty, /*isSigned=*/true));
8369       } else {
8370         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8371         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8372                                    VDLVal.getPointer(CGF));
8373         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8374         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8375         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8376       }
8377       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8378                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8379       CombinedInfo.Mappers.push_back(nullptr);
8380     }
8381   }
8382 
8383   /// Set correct indices for lambdas captures.
8384   void adjustMemberOfForLambdaCaptures(
8385       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8386       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8387       MapFlagsArrayTy &Types) const {
8388     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8389       // Set correct member_of idx for all implicit lambda captures.
8390       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8391                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8392         continue;
8393       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8394       assert(BasePtr && "Unable to find base lambda address.");
8395       int TgtIdx = -1;
8396       for (unsigned J = I; J > 0; --J) {
8397         unsigned Idx = J - 1;
8398         if (Pointers[Idx] != BasePtr)
8399           continue;
8400         TgtIdx = Idx;
8401         break;
8402       }
8403       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8404       // All other current entries will be MEMBER_OF the combined entry
8405       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8406       // 0xFFFF in the MEMBER_OF field).
8407       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8408       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8409     }
8410   }
8411 
8412   /// Generate the base pointers, section pointers, sizes, map types, and
8413   /// mappers associated to a given capture (all included in \a CombinedInfo).
8414   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8415                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8416                               StructRangeInfoTy &PartialStruct) const {
8417     assert(!Cap->capturesVariableArrayType() &&
8418            "Not expecting to generate map info for a variable array type!");
8419 
8420     // We need to know when we generating information for the first component
8421     const ValueDecl *VD = Cap->capturesThis()
8422                               ? nullptr
8423                               : Cap->getCapturedVar()->getCanonicalDecl();
8424 
8425     // If this declaration appears in a is_device_ptr clause we just have to
8426     // pass the pointer by value. If it is a reference to a declaration, we just
8427     // pass its value.
8428     if (DevPointersMap.count(VD)) {
8429       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8430       CombinedInfo.Pointers.push_back(Arg);
8431       CombinedInfo.Sizes.push_back(
8432           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8433                                     CGF.Int64Ty, /*isSigned=*/true));
8434       CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8435       CombinedInfo.Mappers.push_back(nullptr);
8436       return;
8437     }
8438 
8439     using MapData =
8440         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8441                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8442                    const ValueDecl *>;
8443     SmallVector<MapData, 4> DeclComponentLists;
8444     assert(CurDir.is<const OMPExecutableDirective *>() &&
8445            "Expect a executable directive");
8446     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8447     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8448       for (const auto L : C->decl_component_lists(VD)) {
8449         const ValueDecl *VDecl, *Mapper;
8450         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8451         std::tie(VDecl, Components, Mapper) = L;
8452         assert(VDecl == VD && "We got information for the wrong declaration??");
8453         assert(!Components.empty() &&
8454                "Not expecting declaration with no component lists.");
8455         DeclComponentLists.emplace_back(Components, C->getMapType(),
8456                                         C->getMapTypeModifiers(),
8457                                         C->isImplicit(), Mapper);
8458       }
8459     }
8460 
8461     // Find overlapping elements (including the offset from the base element).
8462     llvm::SmallDenseMap<
8463         const MapData *,
8464         llvm::SmallVector<
8465             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8466         4>
8467         OverlappedData;
8468     size_t Count = 0;
8469     for (const MapData &L : DeclComponentLists) {
8470       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8471       OpenMPMapClauseKind MapType;
8472       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8473       bool IsImplicit;
8474       const ValueDecl *Mapper;
8475       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8476       ++Count;
8477       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8478         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8479         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8480         auto CI = Components.rbegin();
8481         auto CE = Components.rend();
8482         auto SI = Components1.rbegin();
8483         auto SE = Components1.rend();
8484         for (; CI != CE && SI != SE; ++CI, ++SI) {
8485           if (CI->getAssociatedExpression()->getStmtClass() !=
8486               SI->getAssociatedExpression()->getStmtClass())
8487             break;
8488           // Are we dealing with different variables/fields?
8489           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8490             break;
8491         }
8492         // Found overlapping if, at least for one component, reached the head of
8493         // the components list.
8494         if (CI == CE || SI == SE) {
8495           assert((CI != CE || SI != SE) &&
8496                  "Unexpected full match of the mapping components.");
8497           const MapData &BaseData = CI == CE ? L : L1;
8498           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8499               SI == SE ? Components : Components1;
8500           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8501           OverlappedElements.getSecond().push_back(SubData);
8502         }
8503       }
8504     }
8505     // Sort the overlapped elements for each item.
8506     llvm::SmallVector<const FieldDecl *, 4> Layout;
8507     if (!OverlappedData.empty()) {
8508       if (const auto *CRD =
8509               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8510         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8511       else {
8512         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8513         Layout.append(RD->field_begin(), RD->field_end());
8514       }
8515     }
8516     for (auto &Pair : OverlappedData) {
8517       llvm::sort(
8518           Pair.getSecond(),
8519           [&Layout](
8520               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8521               OMPClauseMappableExprCommon::MappableExprComponentListRef
8522                   Second) {
8523             auto CI = First.rbegin();
8524             auto CE = First.rend();
8525             auto SI = Second.rbegin();
8526             auto SE = Second.rend();
8527             for (; CI != CE && SI != SE; ++CI, ++SI) {
8528               if (CI->getAssociatedExpression()->getStmtClass() !=
8529                   SI->getAssociatedExpression()->getStmtClass())
8530                 break;
8531               // Are we dealing with different variables/fields?
8532               if (CI->getAssociatedDeclaration() !=
8533                   SI->getAssociatedDeclaration())
8534                 break;
8535             }
8536 
8537             // Lists contain the same elements.
8538             if (CI == CE && SI == SE)
8539               return false;
8540 
8541             // List with less elements is less than list with more elements.
8542             if (CI == CE || SI == SE)
8543               return CI == CE;
8544 
8545             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8546             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8547             if (FD1->getParent() == FD2->getParent())
8548               return FD1->getFieldIndex() < FD2->getFieldIndex();
8549             const auto It =
8550                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8551                   return FD == FD1 || FD == FD2;
8552                 });
8553             return *It == FD1;
8554           });
8555     }
8556 
8557     // Associated with a capture, because the mapping flags depend on it.
8558     // Go through all of the elements with the overlapped elements.
8559     for (const auto &Pair : OverlappedData) {
8560       const MapData &L = *Pair.getFirst();
8561       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8562       OpenMPMapClauseKind MapType;
8563       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8564       bool IsImplicit;
8565       const ValueDecl *Mapper;
8566       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8567       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8568           OverlappedComponents = Pair.getSecond();
8569       bool IsFirstComponentList = true;
8570       generateInfoForComponentList(
8571           MapType, MapModifiers, Components, CombinedInfo, PartialStruct,
8572           IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false,
8573           OverlappedComponents);
8574     }
8575     // Go through other elements without overlapped elements.
8576     bool IsFirstComponentList = OverlappedData.empty();
8577     for (const MapData &L : DeclComponentLists) {
8578       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8579       OpenMPMapClauseKind MapType;
8580       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8581       bool IsImplicit;
8582       const ValueDecl *Mapper;
8583       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8584       auto It = OverlappedData.find(&L);
8585       if (It == OverlappedData.end())
8586         generateInfoForComponentList(MapType, MapModifiers, Components,
8587                                      CombinedInfo, PartialStruct,
8588                                      IsFirstComponentList, IsImplicit, Mapper);
8589       IsFirstComponentList = false;
8590     }
8591   }
8592 
8593   /// Generate the default map information for a given capture \a CI,
8594   /// record field declaration \a RI and captured value \a CV.
8595   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8596                               const FieldDecl &RI, llvm::Value *CV,
8597                               MapCombinedInfoTy &CombinedInfo) const {
8598     bool IsImplicit = true;
8599     // Do the default mapping.
8600     if (CI.capturesThis()) {
8601       CombinedInfo.BasePointers.push_back(CV);
8602       CombinedInfo.Pointers.push_back(CV);
8603       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8604       CombinedInfo.Sizes.push_back(
8605           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8606                                     CGF.Int64Ty, /*isSigned=*/true));
8607       // Default map type.
8608       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8609     } else if (CI.capturesVariableByCopy()) {
8610       CombinedInfo.BasePointers.push_back(CV);
8611       CombinedInfo.Pointers.push_back(CV);
8612       if (!RI.getType()->isAnyPointerType()) {
8613         // We have to signal to the runtime captures passed by value that are
8614         // not pointers.
8615         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8616         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8617             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8618       } else {
8619         // Pointers are implicitly mapped with a zero size and no flags
8620         // (other than first map that is added for all implicit maps).
8621         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8622         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8623       }
8624       const VarDecl *VD = CI.getCapturedVar();
8625       auto I = FirstPrivateDecls.find(VD);
8626       if (I != FirstPrivateDecls.end())
8627         IsImplicit = I->getSecond();
8628     } else {
8629       assert(CI.capturesVariable() && "Expected captured reference.");
8630       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8631       QualType ElementType = PtrTy->getPointeeType();
8632       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8633           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8634       // The default map type for a scalar/complex type is 'to' because by
8635       // default the value doesn't have to be retrieved. For an aggregate
8636       // type, the default is 'tofrom'.
8637       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8638       const VarDecl *VD = CI.getCapturedVar();
8639       auto I = FirstPrivateDecls.find(VD);
8640       if (I != FirstPrivateDecls.end() &&
8641           VD->getType().isConstant(CGF.getContext())) {
8642         llvm::Constant *Addr =
8643             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8644         // Copy the value of the original variable to the new global copy.
8645         CGF.Builder.CreateMemCpy(
8646             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8647             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8648             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8649         // Use new global variable as the base pointers.
8650         CombinedInfo.BasePointers.push_back(Addr);
8651         CombinedInfo.Pointers.push_back(Addr);
8652       } else {
8653         CombinedInfo.BasePointers.push_back(CV);
8654         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8655           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8656               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8657               AlignmentSource::Decl));
8658           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8659         } else {
8660           CombinedInfo.Pointers.push_back(CV);
8661         }
8662       }
8663       if (I != FirstPrivateDecls.end())
8664         IsImplicit = I->getSecond();
8665     }
8666     // Every default map produces a single argument which is a target parameter.
8667     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8668 
8669     // Add flag stating this is an implicit map.
8670     if (IsImplicit)
8671       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8672 
8673     // No user-defined mapper for default mapping.
8674     CombinedInfo.Mappers.push_back(nullptr);
8675   }
8676 };
8677 } // anonymous namespace
8678 
8679 /// Emit the arrays used to pass the captures and map information to the
8680 /// offloading runtime library. If there is no map or capture information,
8681 /// return nullptr by reference.
8682 static void
8683 emitOffloadingArrays(CodeGenFunction &CGF,
8684                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8685                      CGOpenMPRuntime::TargetDataInfo &Info) {
8686   CodeGenModule &CGM = CGF.CGM;
8687   ASTContext &Ctx = CGF.getContext();
8688 
8689   // Reset the array information.
8690   Info.clearArrayInfo();
8691   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8692 
8693   if (Info.NumberOfPtrs) {
8694     // Detect if we have any capture size requiring runtime evaluation of the
8695     // size so that a constant array could be eventually used.
8696     bool hasRuntimeEvaluationCaptureSize = false;
8697     for (llvm::Value *S : CombinedInfo.Sizes)
8698       if (!isa<llvm::Constant>(S)) {
8699         hasRuntimeEvaluationCaptureSize = true;
8700         break;
8701       }
8702 
8703     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8704     QualType PointerArrayType = Ctx.getConstantArrayType(
8705         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8706         /*IndexTypeQuals=*/0);
8707 
8708     Info.BasePointersArray =
8709         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8710     Info.PointersArray =
8711         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8712     Address MappersArray =
8713         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
8714     Info.MappersArray = MappersArray.getPointer();
8715 
8716     // If we don't have any VLA types or other types that require runtime
8717     // evaluation, we can use a constant array for the map sizes, otherwise we
8718     // need to fill up the arrays as we do for the pointers.
8719     QualType Int64Ty =
8720         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8721     if (hasRuntimeEvaluationCaptureSize) {
8722       QualType SizeArrayType = Ctx.getConstantArrayType(
8723           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8724           /*IndexTypeQuals=*/0);
8725       Info.SizesArray =
8726           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8727     } else {
8728       // We expect all the sizes to be constant, so we collect them to create
8729       // a constant array.
8730       SmallVector<llvm::Constant *, 16> ConstSizes;
8731       for (llvm::Value *S : CombinedInfo.Sizes)
8732         ConstSizes.push_back(cast<llvm::Constant>(S));
8733 
8734       auto *SizesArrayInit = llvm::ConstantArray::get(
8735           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8736       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8737       auto *SizesArrayGbl = new llvm::GlobalVariable(
8738           CGM.getModule(), SizesArrayInit->getType(),
8739           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8740           SizesArrayInit, Name);
8741       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8742       Info.SizesArray = SizesArrayGbl;
8743     }
8744 
8745     // The map types are always constant so we don't need to generate code to
8746     // fill arrays. Instead, we create an array constant.
8747     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
8748     llvm::copy(CombinedInfo.Types, Mapping.begin());
8749     llvm::Constant *MapTypesArrayInit =
8750         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8751     std::string MaptypesName =
8752         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8753     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8754         CGM.getModule(), MapTypesArrayInit->getType(),
8755         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8756         MapTypesArrayInit, MaptypesName);
8757     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8758     Info.MapTypesArray = MapTypesArrayGbl;
8759 
8760     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8761       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
8762       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8763           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8764           Info.BasePointersArray, 0, I);
8765       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8766           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8767       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8768       CGF.Builder.CreateStore(BPVal, BPAddr);
8769 
8770       if (Info.requiresDevicePointerInfo())
8771         if (const ValueDecl *DevVD =
8772                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
8773           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8774 
8775       llvm::Value *PVal = CombinedInfo.Pointers[I];
8776       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8777           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8778           Info.PointersArray, 0, I);
8779       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8780           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8781       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8782       CGF.Builder.CreateStore(PVal, PAddr);
8783 
8784       if (hasRuntimeEvaluationCaptureSize) {
8785         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8786             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8787             Info.SizesArray,
8788             /*Idx0=*/0,
8789             /*Idx1=*/I);
8790         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8791         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
8792                                                           CGM.Int64Ty,
8793                                                           /*isSigned=*/true),
8794                                 SAddr);
8795       }
8796 
8797       // Fill up the mapper array.
8798       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
8799       if (CombinedInfo.Mappers[I]) {
8800         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8801             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8802         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
8803         Info.HasMapper = true;
8804       }
8805       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
8806       CGF.Builder.CreateStore(MFunc, MAddr);
8807     }
8808   }
8809 }
8810 
8811 /// Emit the arguments to be passed to the runtime library based on the
8812 /// arrays of base pointers, pointers, sizes, map types, and mappers.
8813 static void emitOffloadingArraysArgument(
8814     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8815     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8816     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
8817     CGOpenMPRuntime::TargetDataInfo &Info) {
8818   CodeGenModule &CGM = CGF.CGM;
8819   if (Info.NumberOfPtrs) {
8820     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8821         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8822         Info.BasePointersArray,
8823         /*Idx0=*/0, /*Idx1=*/0);
8824     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8825         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8826         Info.PointersArray,
8827         /*Idx0=*/0,
8828         /*Idx1=*/0);
8829     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8830         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8831         /*Idx0=*/0, /*Idx1=*/0);
8832     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8833         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8834         Info.MapTypesArray,
8835         /*Idx0=*/0,
8836         /*Idx1=*/0);
8837     MappersArrayArg =
8838         Info.HasMapper
8839             ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
8840             : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8841   } else {
8842     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8843     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8844     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8845     MapTypesArrayArg =
8846         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8847     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8848   }
8849 }
8850 
8851 /// Check for inner distribute directive.
8852 static const OMPExecutableDirective *
8853 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8854   const auto *CS = D.getInnermostCapturedStmt();
8855   const auto *Body =
8856       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8857   const Stmt *ChildStmt =
8858       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8859 
8860   if (const auto *NestedDir =
8861           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8862     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8863     switch (D.getDirectiveKind()) {
8864     case OMPD_target:
8865       if (isOpenMPDistributeDirective(DKind))
8866         return NestedDir;
8867       if (DKind == OMPD_teams) {
8868         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8869             /*IgnoreCaptured=*/true);
8870         if (!Body)
8871           return nullptr;
8872         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8873         if (const auto *NND =
8874                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8875           DKind = NND->getDirectiveKind();
8876           if (isOpenMPDistributeDirective(DKind))
8877             return NND;
8878         }
8879       }
8880       return nullptr;
8881     case OMPD_target_teams:
8882       if (isOpenMPDistributeDirective(DKind))
8883         return NestedDir;
8884       return nullptr;
8885     case OMPD_target_parallel:
8886     case OMPD_target_simd:
8887     case OMPD_target_parallel_for:
8888     case OMPD_target_parallel_for_simd:
8889       return nullptr;
8890     case OMPD_target_teams_distribute:
8891     case OMPD_target_teams_distribute_simd:
8892     case OMPD_target_teams_distribute_parallel_for:
8893     case OMPD_target_teams_distribute_parallel_for_simd:
8894     case OMPD_parallel:
8895     case OMPD_for:
8896     case OMPD_parallel_for:
8897     case OMPD_parallel_master:
8898     case OMPD_parallel_sections:
8899     case OMPD_for_simd:
8900     case OMPD_parallel_for_simd:
8901     case OMPD_cancel:
8902     case OMPD_cancellation_point:
8903     case OMPD_ordered:
8904     case OMPD_threadprivate:
8905     case OMPD_allocate:
8906     case OMPD_task:
8907     case OMPD_simd:
8908     case OMPD_sections:
8909     case OMPD_section:
8910     case OMPD_single:
8911     case OMPD_master:
8912     case OMPD_critical:
8913     case OMPD_taskyield:
8914     case OMPD_barrier:
8915     case OMPD_taskwait:
8916     case OMPD_taskgroup:
8917     case OMPD_atomic:
8918     case OMPD_flush:
8919     case OMPD_depobj:
8920     case OMPD_scan:
8921     case OMPD_teams:
8922     case OMPD_target_data:
8923     case OMPD_target_exit_data:
8924     case OMPD_target_enter_data:
8925     case OMPD_distribute:
8926     case OMPD_distribute_simd:
8927     case OMPD_distribute_parallel_for:
8928     case OMPD_distribute_parallel_for_simd:
8929     case OMPD_teams_distribute:
8930     case OMPD_teams_distribute_simd:
8931     case OMPD_teams_distribute_parallel_for:
8932     case OMPD_teams_distribute_parallel_for_simd:
8933     case OMPD_target_update:
8934     case OMPD_declare_simd:
8935     case OMPD_declare_variant:
8936     case OMPD_begin_declare_variant:
8937     case OMPD_end_declare_variant:
8938     case OMPD_declare_target:
8939     case OMPD_end_declare_target:
8940     case OMPD_declare_reduction:
8941     case OMPD_declare_mapper:
8942     case OMPD_taskloop:
8943     case OMPD_taskloop_simd:
8944     case OMPD_master_taskloop:
8945     case OMPD_master_taskloop_simd:
8946     case OMPD_parallel_master_taskloop:
8947     case OMPD_parallel_master_taskloop_simd:
8948     case OMPD_requires:
8949     case OMPD_unknown:
8950     default:
8951       llvm_unreachable("Unexpected directive.");
8952     }
8953   }
8954 
8955   return nullptr;
8956 }
8957 
8958 /// Emit the user-defined mapper function. The code generation follows the
8959 /// pattern in the example below.
8960 /// \code
8961 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8962 ///                                           void *base, void *begin,
8963 ///                                           int64_t size, int64_t type) {
8964 ///   // Allocate space for an array section first.
8965 ///   if (size > 1 && !maptype.IsDelete)
8966 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8967 ///                                 size*sizeof(Ty), clearToFrom(type));
8968 ///   // Map members.
8969 ///   for (unsigned i = 0; i < size; i++) {
8970 ///     // For each component specified by this mapper:
8971 ///     for (auto c : all_components) {
8972 ///       if (c.hasMapper())
8973 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8974 ///                       c.arg_type);
8975 ///       else
8976 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8977 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8978 ///     }
8979 ///   }
8980 ///   // Delete the array section.
8981 ///   if (size > 1 && maptype.IsDelete)
8982 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8983 ///                                 size*sizeof(Ty), clearToFrom(type));
8984 /// }
8985 /// \endcode
8986 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8987                                             CodeGenFunction *CGF) {
8988   if (UDMMap.count(D) > 0)
8989     return;
8990   ASTContext &C = CGM.getContext();
8991   QualType Ty = D->getType();
8992   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8993   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8994   auto *MapperVarDecl =
8995       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8996   SourceLocation Loc = D->getLocation();
8997   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8998 
8999   // Prepare mapper function arguments and attributes.
9000   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9001                               C.VoidPtrTy, ImplicitParamDecl::Other);
9002   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9003                             ImplicitParamDecl::Other);
9004   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9005                              C.VoidPtrTy, ImplicitParamDecl::Other);
9006   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9007                             ImplicitParamDecl::Other);
9008   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9009                             ImplicitParamDecl::Other);
9010   FunctionArgList Args;
9011   Args.push_back(&HandleArg);
9012   Args.push_back(&BaseArg);
9013   Args.push_back(&BeginArg);
9014   Args.push_back(&SizeArg);
9015   Args.push_back(&TypeArg);
9016   const CGFunctionInfo &FnInfo =
9017       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9018   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9019   SmallString<64> TyStr;
9020   llvm::raw_svector_ostream Out(TyStr);
9021   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9022   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9023   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9024                                     Name, &CGM.getModule());
9025   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9026   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9027   // Start the mapper function code generation.
9028   CodeGenFunction MapperCGF(CGM);
9029   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9030   // Compute the starting and end addreses of array elements.
9031   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9032       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9033       C.getPointerType(Int64Ty), Loc);
9034   // Convert the size in bytes into the number of array elements.
9035   Size = MapperCGF.Builder.CreateExactUDiv(
9036       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9037   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9038       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9039       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9040   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9041   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9042       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9043       C.getPointerType(Int64Ty), Loc);
9044   // Prepare common arguments for array initiation and deletion.
9045   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9046       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9047       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9048   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9049       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9050       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9051   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9052       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9053       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9054 
9055   // Emit array initiation if this is an array section and \p MapType indicates
9056   // that memory allocation is required.
9057   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9058   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9059                              ElementSize, HeadBB, /*IsInit=*/true);
9060 
9061   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9062 
9063   // Emit the loop header block.
9064   MapperCGF.EmitBlock(HeadBB);
9065   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9066   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9067   // Evaluate whether the initial condition is satisfied.
9068   llvm::Value *IsEmpty =
9069       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9070   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9071   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9072 
9073   // Emit the loop body block.
9074   MapperCGF.EmitBlock(BodyBB);
9075   llvm::BasicBlock *LastBB = BodyBB;
9076   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9077       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9078   PtrPHI->addIncoming(PtrBegin, EntryBB);
9079   Address PtrCurrent =
9080       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9081                           .getAlignment()
9082                           .alignmentOfArrayElement(ElementSize));
9083   // Privatize the declared variable of mapper to be the current array element.
9084   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9085   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9086     return MapperCGF
9087         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9088         .getAddress(MapperCGF);
9089   });
9090   (void)Scope.Privatize();
9091 
9092   // Get map clause information. Fill up the arrays with all mapped variables.
9093   MappableExprsHandler::MapCombinedInfoTy Info;
9094   MappableExprsHandler MEHandler(*D, MapperCGF);
9095   MEHandler.generateAllInfoForMapper(Info);
9096 
9097   // Call the runtime API __tgt_mapper_num_components to get the number of
9098   // pre-existing components.
9099   llvm::Value *OffloadingArgs[] = {Handle};
9100   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9101       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9102                                             OMPRTL___tgt_mapper_num_components),
9103       OffloadingArgs);
9104   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9105       PreviousSize,
9106       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9107 
9108   // Fill up the runtime mapper handle for all components.
9109   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9110     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9111         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9112     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9113         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9114     llvm::Value *CurSizeArg = Info.Sizes[I];
9115 
9116     // Extract the MEMBER_OF field from the map type.
9117     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9118     MapperCGF.EmitBlock(MemberBB);
9119     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9120     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9121         OriMapType,
9122         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9123     llvm::BasicBlock *MemberCombineBB =
9124         MapperCGF.createBasicBlock("omp.member.combine");
9125     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9126     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9127     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9128     // Add the number of pre-existing components to the MEMBER_OF field if it
9129     // is valid.
9130     MapperCGF.EmitBlock(MemberCombineBB);
9131     llvm::Value *CombinedMember =
9132         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9133     // Do nothing if it is not a member of previous components.
9134     MapperCGF.EmitBlock(TypeBB);
9135     llvm::PHINode *MemberMapType =
9136         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9137     MemberMapType->addIncoming(OriMapType, MemberBB);
9138     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9139 
9140     // Combine the map type inherited from user-defined mapper with that
9141     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9142     // bits of the \a MapType, which is the input argument of the mapper
9143     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9144     // bits of MemberMapType.
9145     // [OpenMP 5.0], 1.2.6. map-type decay.
9146     //        | alloc |  to   | from  | tofrom | release | delete
9147     // ----------------------------------------------------------
9148     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9149     // to     | alloc |  to   | alloc |   to   | release | delete
9150     // from   | alloc | alloc | from  |  from  | release | delete
9151     // tofrom | alloc |  to   | from  | tofrom | release | delete
9152     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9153         MapType,
9154         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9155                                    MappableExprsHandler::OMP_MAP_FROM));
9156     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9157     llvm::BasicBlock *AllocElseBB =
9158         MapperCGF.createBasicBlock("omp.type.alloc.else");
9159     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9160     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9161     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9162     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9163     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9164     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9165     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9166     MapperCGF.EmitBlock(AllocBB);
9167     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9168         MemberMapType,
9169         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9170                                      MappableExprsHandler::OMP_MAP_FROM)));
9171     MapperCGF.Builder.CreateBr(EndBB);
9172     MapperCGF.EmitBlock(AllocElseBB);
9173     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9174         LeftToFrom,
9175         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9176     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9177     // In case of to, clear OMP_MAP_FROM.
9178     MapperCGF.EmitBlock(ToBB);
9179     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9180         MemberMapType,
9181         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9182     MapperCGF.Builder.CreateBr(EndBB);
9183     MapperCGF.EmitBlock(ToElseBB);
9184     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9185         LeftToFrom,
9186         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9187     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9188     // In case of from, clear OMP_MAP_TO.
9189     MapperCGF.EmitBlock(FromBB);
9190     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9191         MemberMapType,
9192         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9193     // In case of tofrom, do nothing.
9194     MapperCGF.EmitBlock(EndBB);
9195     LastBB = EndBB;
9196     llvm::PHINode *CurMapType =
9197         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9198     CurMapType->addIncoming(AllocMapType, AllocBB);
9199     CurMapType->addIncoming(ToMapType, ToBB);
9200     CurMapType->addIncoming(FromMapType, FromBB);
9201     CurMapType->addIncoming(MemberMapType, ToElseBB);
9202 
9203     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9204                                      CurSizeArg, CurMapType};
9205     if (Info.Mappers[I]) {
9206       // Call the corresponding mapper function.
9207       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9208           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9209       assert(MapperFunc && "Expect a valid mapper function is available.");
9210       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9211     } else {
9212       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9213       // data structure.
9214       MapperCGF.EmitRuntimeCall(
9215           OMPBuilder.getOrCreateRuntimeFunction(
9216               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9217           OffloadingArgs);
9218     }
9219   }
9220 
9221   // Update the pointer to point to the next element that needs to be mapped,
9222   // and check whether we have mapped all elements.
9223   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9224       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9225   PtrPHI->addIncoming(PtrNext, LastBB);
9226   llvm::Value *IsDone =
9227       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9228   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9229   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9230 
9231   MapperCGF.EmitBlock(ExitBB);
9232   // Emit array deletion if this is an array section and \p MapType indicates
9233   // that deletion is required.
9234   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9235                              ElementSize, DoneBB, /*IsInit=*/false);
9236 
9237   // Emit the function exit block.
9238   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9239   MapperCGF.FinishFunction();
9240   UDMMap.try_emplace(D, Fn);
9241   if (CGF) {
9242     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9243     Decls.second.push_back(D);
9244   }
9245 }
9246 
9247 /// Emit the array initialization or deletion portion for user-defined mapper
9248 /// code generation. First, it evaluates whether an array section is mapped and
9249 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9250 /// true, and \a MapType indicates to not delete this array, array
9251 /// initialization code is generated. If \a IsInit is false, and \a MapType
9252 /// indicates to not this array, array deletion code is generated.
9253 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9254     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9255     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9256     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9257   StringRef Prefix = IsInit ? ".init" : ".del";
9258 
9259   // Evaluate if this is an array section.
9260   llvm::BasicBlock *IsDeleteBB =
9261       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9262   llvm::BasicBlock *BodyBB =
9263       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9264   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9265       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9266   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9267 
9268   // Evaluate if we are going to delete this section.
9269   MapperCGF.EmitBlock(IsDeleteBB);
9270   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9271       MapType,
9272       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9273   llvm::Value *DeleteCond;
9274   if (IsInit) {
9275     DeleteCond = MapperCGF.Builder.CreateIsNull(
9276         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9277   } else {
9278     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9279         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9280   }
9281   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9282 
9283   MapperCGF.EmitBlock(BodyBB);
9284   // Get the array size by multiplying element size and element number (i.e., \p
9285   // Size).
9286   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9287       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9288   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9289   // memory allocation/deletion purpose only.
9290   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9291       MapType,
9292       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9293                                    MappableExprsHandler::OMP_MAP_FROM)));
9294   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9295   // data structure.
9296   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9297   MapperCGF.EmitRuntimeCall(
9298       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9299                                             OMPRTL___tgt_push_mapper_component),
9300       OffloadingArgs);
9301 }
9302 
9303 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9304     const OMPDeclareMapperDecl *D) {
9305   auto I = UDMMap.find(D);
9306   if (I != UDMMap.end())
9307     return I->second;
9308   emitUserDefinedMapper(D);
9309   return UDMMap.lookup(D);
9310 }
9311 
9312 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9313     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9314     llvm::Value *DeviceID,
9315     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9316                                      const OMPLoopDirective &D)>
9317         SizeEmitter) {
9318   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9319   const OMPExecutableDirective *TD = &D;
9320   // Get nested teams distribute kind directive, if any.
9321   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9322     TD = getNestedDistributeDirective(CGM.getContext(), D);
9323   if (!TD)
9324     return;
9325   const auto *LD = cast<OMPLoopDirective>(TD);
9326   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9327                                                      PrePostActionTy &) {
9328     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9329       llvm::Value *Args[] = {DeviceID, NumIterations};
9330       CGF.EmitRuntimeCall(
9331           OMPBuilder.getOrCreateRuntimeFunction(
9332               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9333           Args);
9334     }
9335   };
9336   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9337 }
9338 
9339 void CGOpenMPRuntime::emitTargetCall(
9340     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9341     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9342     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9343     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9344                                      const OMPLoopDirective &D)>
9345         SizeEmitter) {
9346   if (!CGF.HaveInsertPoint())
9347     return;
9348 
9349   assert(OutlinedFn && "Invalid outlined function!");
9350 
9351   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9352   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9353   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9354   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9355                                             PrePostActionTy &) {
9356     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9357   };
9358   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9359 
9360   CodeGenFunction::OMPTargetDataInfo InputInfo;
9361   llvm::Value *MapTypesArray = nullptr;
9362   // Fill up the pointer arrays and transfer execution to the device.
9363   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9364                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9365                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9366     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9367       // Reverse offloading is not supported, so just execute on the host.
9368       if (RequiresOuterTask) {
9369         CapturedVars.clear();
9370         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9371       }
9372       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9373       return;
9374     }
9375 
9376     // On top of the arrays that were filled up, the target offloading call
9377     // takes as arguments the device id as well as the host pointer. The host
9378     // pointer is used by the runtime library to identify the current target
9379     // region, so it only has to be unique and not necessarily point to
9380     // anything. It could be the pointer to the outlined function that
9381     // implements the target region, but we aren't using that so that the
9382     // compiler doesn't need to keep that, and could therefore inline the host
9383     // function if proven worthwhile during optimization.
9384 
9385     // From this point on, we need to have an ID of the target region defined.
9386     assert(OutlinedFnID && "Invalid outlined function ID!");
9387 
9388     // Emit device ID if any.
9389     llvm::Value *DeviceID;
9390     if (Device.getPointer()) {
9391       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9392               Device.getInt() == OMPC_DEVICE_device_num) &&
9393              "Expected device_num modifier.");
9394       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9395       DeviceID =
9396           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9397     } else {
9398       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9399     }
9400 
9401     // Emit the number of elements in the offloading arrays.
9402     llvm::Value *PointerNum =
9403         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9404 
9405     // Return value of the runtime offloading call.
9406     llvm::Value *Return;
9407 
9408     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9409     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9410 
9411     // Emit tripcount for the target loop-based directive.
9412     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9413 
9414     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9415     // The target region is an outlined function launched by the runtime
9416     // via calls __tgt_target() or __tgt_target_teams().
9417     //
9418     // __tgt_target() launches a target region with one team and one thread,
9419     // executing a serial region.  This master thread may in turn launch
9420     // more threads within its team upon encountering a parallel region,
9421     // however, no additional teams can be launched on the device.
9422     //
9423     // __tgt_target_teams() launches a target region with one or more teams,
9424     // each with one or more threads.  This call is required for target
9425     // constructs such as:
9426     //  'target teams'
9427     //  'target' / 'teams'
9428     //  'target teams distribute parallel for'
9429     //  'target parallel'
9430     // and so on.
9431     //
9432     // Note that on the host and CPU targets, the runtime implementation of
9433     // these calls simply call the outlined function without forking threads.
9434     // The outlined functions themselves have runtime calls to
9435     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9436     // the compiler in emitTeamsCall() and emitParallelCall().
9437     //
9438     // In contrast, on the NVPTX target, the implementation of
9439     // __tgt_target_teams() launches a GPU kernel with the requested number
9440     // of teams and threads so no additional calls to the runtime are required.
9441     if (NumTeams) {
9442       // If we have NumTeams defined this means that we have an enclosed teams
9443       // region. Therefore we also expect to have NumThreads defined. These two
9444       // values should be defined in the presence of a teams directive,
9445       // regardless of having any clauses associated. If the user is using teams
9446       // but no clauses, these two values will be the default that should be
9447       // passed to the runtime library - a 32-bit integer with the value zero.
9448       assert(NumThreads && "Thread limit expression should be available along "
9449                            "with number of teams.");
9450       llvm::Value *OffloadingArgs[] = {DeviceID,
9451                                        OutlinedFnID,
9452                                        PointerNum,
9453                                        InputInfo.BasePointersArray.getPointer(),
9454                                        InputInfo.PointersArray.getPointer(),
9455                                        InputInfo.SizesArray.getPointer(),
9456                                        MapTypesArray,
9457                                        InputInfo.MappersArray.getPointer(),
9458                                        NumTeams,
9459                                        NumThreads};
9460       Return = CGF.EmitRuntimeCall(
9461           OMPBuilder.getOrCreateRuntimeFunction(
9462               CGM.getModule(), HasNowait
9463                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9464                                    : OMPRTL___tgt_target_teams_mapper),
9465           OffloadingArgs);
9466     } else {
9467       llvm::Value *OffloadingArgs[] = {DeviceID,
9468                                        OutlinedFnID,
9469                                        PointerNum,
9470                                        InputInfo.BasePointersArray.getPointer(),
9471                                        InputInfo.PointersArray.getPointer(),
9472                                        InputInfo.SizesArray.getPointer(),
9473                                        MapTypesArray,
9474                                        InputInfo.MappersArray.getPointer()};
9475       Return = CGF.EmitRuntimeCall(
9476           OMPBuilder.getOrCreateRuntimeFunction(
9477               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9478                                          : OMPRTL___tgt_target_mapper),
9479           OffloadingArgs);
9480     }
9481 
9482     // Check the error code and execute the host version if required.
9483     llvm::BasicBlock *OffloadFailedBlock =
9484         CGF.createBasicBlock("omp_offload.failed");
9485     llvm::BasicBlock *OffloadContBlock =
9486         CGF.createBasicBlock("omp_offload.cont");
9487     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9488     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9489 
9490     CGF.EmitBlock(OffloadFailedBlock);
9491     if (RequiresOuterTask) {
9492       CapturedVars.clear();
9493       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9494     }
9495     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9496     CGF.EmitBranch(OffloadContBlock);
9497 
9498     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9499   };
9500 
9501   // Notify that the host version must be executed.
9502   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9503                     RequiresOuterTask](CodeGenFunction &CGF,
9504                                        PrePostActionTy &) {
9505     if (RequiresOuterTask) {
9506       CapturedVars.clear();
9507       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9508     }
9509     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9510   };
9511 
9512   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9513                           &CapturedVars, RequiresOuterTask,
9514                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9515     // Fill up the arrays with all the captured variables.
9516     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9517 
9518     // Get mappable expression information.
9519     MappableExprsHandler MEHandler(D, CGF);
9520     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9521     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9522 
9523     auto RI = CS.getCapturedRecordDecl()->field_begin();
9524     auto CV = CapturedVars.begin();
9525     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9526                                               CE = CS.capture_end();
9527          CI != CE; ++CI, ++RI, ++CV) {
9528       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9529       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9530 
9531       // VLA sizes are passed to the outlined region by copy and do not have map
9532       // information associated.
9533       if (CI->capturesVariableArrayType()) {
9534         CurInfo.BasePointers.push_back(*CV);
9535         CurInfo.Pointers.push_back(*CV);
9536         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9537             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9538         // Copy to the device as an argument. No need to retrieve it.
9539         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9540                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9541                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9542         CurInfo.Mappers.push_back(nullptr);
9543       } else {
9544         // If we have any information in the map clause, we use it, otherwise we
9545         // just do a default mapping.
9546         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9547         if (!CI->capturesThis())
9548           MappedVarSet.insert(CI->getCapturedVar());
9549         else
9550           MappedVarSet.insert(nullptr);
9551         if (CurInfo.BasePointers.empty())
9552           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9553         // Generate correct mapping for variables captured by reference in
9554         // lambdas.
9555         if (CI->capturesVariable())
9556           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9557                                                   CurInfo, LambdaPointers);
9558       }
9559       // We expect to have at least an element of information for this capture.
9560       assert(!CurInfo.BasePointers.empty() &&
9561              "Non-existing map pointer for capture!");
9562       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9563              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9564              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9565              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9566              "Inconsistent map information sizes!");
9567 
9568       // If there is an entry in PartialStruct it means we have a struct with
9569       // individual members mapped. Emit an extra combined entry.
9570       if (PartialStruct.Base.isValid())
9571         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9572 
9573       // We need to append the results of this capture to what we already have.
9574       CombinedInfo.append(CurInfo);
9575     }
9576     // Adjust MEMBER_OF flags for the lambdas captures.
9577     MEHandler.adjustMemberOfForLambdaCaptures(
9578         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9579         CombinedInfo.Types);
9580     // Map any list items in a map clause that were not captures because they
9581     // weren't referenced within the construct.
9582     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9583 
9584     TargetDataInfo Info;
9585     // Fill up the arrays and create the arguments.
9586     emitOffloadingArrays(CGF, CombinedInfo, Info);
9587     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9588                                  Info.PointersArray, Info.SizesArray,
9589                                  Info.MapTypesArray, Info.MappersArray, Info);
9590     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9591     InputInfo.BasePointersArray =
9592         Address(Info.BasePointersArray, CGM.getPointerAlign());
9593     InputInfo.PointersArray =
9594         Address(Info.PointersArray, CGM.getPointerAlign());
9595     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9596     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9597     MapTypesArray = Info.MapTypesArray;
9598     if (RequiresOuterTask)
9599       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9600     else
9601       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9602   };
9603 
9604   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9605                              CodeGenFunction &CGF, PrePostActionTy &) {
9606     if (RequiresOuterTask) {
9607       CodeGenFunction::OMPTargetDataInfo InputInfo;
9608       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9609     } else {
9610       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9611     }
9612   };
9613 
9614   // If we have a target function ID it means that we need to support
9615   // offloading, otherwise, just execute on the host. We need to execute on host
9616   // regardless of the conditional in the if clause if, e.g., the user do not
9617   // specify target triples.
9618   if (OutlinedFnID) {
9619     if (IfCond) {
9620       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9621     } else {
9622       RegionCodeGenTy ThenRCG(TargetThenGen);
9623       ThenRCG(CGF);
9624     }
9625   } else {
9626     RegionCodeGenTy ElseRCG(TargetElseGen);
9627     ElseRCG(CGF);
9628   }
9629 }
9630 
9631 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9632                                                     StringRef ParentName) {
9633   if (!S)
9634     return;
9635 
9636   // Codegen OMP target directives that offload compute to the device.
9637   bool RequiresDeviceCodegen =
9638       isa<OMPExecutableDirective>(S) &&
9639       isOpenMPTargetExecutionDirective(
9640           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9641 
9642   if (RequiresDeviceCodegen) {
9643     const auto &E = *cast<OMPExecutableDirective>(S);
9644     unsigned DeviceID;
9645     unsigned FileID;
9646     unsigned Line;
9647     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9648                              FileID, Line);
9649 
9650     // Is this a target region that should not be emitted as an entry point? If
9651     // so just signal we are done with this target region.
9652     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9653                                                             ParentName, Line))
9654       return;
9655 
9656     switch (E.getDirectiveKind()) {
9657     case OMPD_target:
9658       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9659                                                    cast<OMPTargetDirective>(E));
9660       break;
9661     case OMPD_target_parallel:
9662       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9663           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9664       break;
9665     case OMPD_target_teams:
9666       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9667           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9668       break;
9669     case OMPD_target_teams_distribute:
9670       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9671           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9672       break;
9673     case OMPD_target_teams_distribute_simd:
9674       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9675           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9676       break;
9677     case OMPD_target_parallel_for:
9678       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9679           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9680       break;
9681     case OMPD_target_parallel_for_simd:
9682       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9683           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9684       break;
9685     case OMPD_target_simd:
9686       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9687           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9688       break;
9689     case OMPD_target_teams_distribute_parallel_for:
9690       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9691           CGM, ParentName,
9692           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9693       break;
9694     case OMPD_target_teams_distribute_parallel_for_simd:
9695       CodeGenFunction::
9696           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9697               CGM, ParentName,
9698               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9699       break;
9700     case OMPD_parallel:
9701     case OMPD_for:
9702     case OMPD_parallel_for:
9703     case OMPD_parallel_master:
9704     case OMPD_parallel_sections:
9705     case OMPD_for_simd:
9706     case OMPD_parallel_for_simd:
9707     case OMPD_cancel:
9708     case OMPD_cancellation_point:
9709     case OMPD_ordered:
9710     case OMPD_threadprivate:
9711     case OMPD_allocate:
9712     case OMPD_task:
9713     case OMPD_simd:
9714     case OMPD_sections:
9715     case OMPD_section:
9716     case OMPD_single:
9717     case OMPD_master:
9718     case OMPD_critical:
9719     case OMPD_taskyield:
9720     case OMPD_barrier:
9721     case OMPD_taskwait:
9722     case OMPD_taskgroup:
9723     case OMPD_atomic:
9724     case OMPD_flush:
9725     case OMPD_depobj:
9726     case OMPD_scan:
9727     case OMPD_teams:
9728     case OMPD_target_data:
9729     case OMPD_target_exit_data:
9730     case OMPD_target_enter_data:
9731     case OMPD_distribute:
9732     case OMPD_distribute_simd:
9733     case OMPD_distribute_parallel_for:
9734     case OMPD_distribute_parallel_for_simd:
9735     case OMPD_teams_distribute:
9736     case OMPD_teams_distribute_simd:
9737     case OMPD_teams_distribute_parallel_for:
9738     case OMPD_teams_distribute_parallel_for_simd:
9739     case OMPD_target_update:
9740     case OMPD_declare_simd:
9741     case OMPD_declare_variant:
9742     case OMPD_begin_declare_variant:
9743     case OMPD_end_declare_variant:
9744     case OMPD_declare_target:
9745     case OMPD_end_declare_target:
9746     case OMPD_declare_reduction:
9747     case OMPD_declare_mapper:
9748     case OMPD_taskloop:
9749     case OMPD_taskloop_simd:
9750     case OMPD_master_taskloop:
9751     case OMPD_master_taskloop_simd:
9752     case OMPD_parallel_master_taskloop:
9753     case OMPD_parallel_master_taskloop_simd:
9754     case OMPD_requires:
9755     case OMPD_unknown:
9756     default:
9757       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9758     }
9759     return;
9760   }
9761 
9762   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9763     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9764       return;
9765 
9766     scanForTargetRegionsFunctions(
9767         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9768     return;
9769   }
9770 
9771   // If this is a lambda function, look into its body.
9772   if (const auto *L = dyn_cast<LambdaExpr>(S))
9773     S = L->getBody();
9774 
9775   // Keep looking for target regions recursively.
9776   for (const Stmt *II : S->children())
9777     scanForTargetRegionsFunctions(II, ParentName);
9778 }
9779 
9780 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9781   // If emitting code for the host, we do not process FD here. Instead we do
9782   // the normal code generation.
9783   if (!CGM.getLangOpts().OpenMPIsDevice) {
9784     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9785       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9786           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9787       // Do not emit device_type(nohost) functions for the host.
9788       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9789         return true;
9790     }
9791     return false;
9792   }
9793 
9794   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9795   // Try to detect target regions in the function.
9796   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9797     StringRef Name = CGM.getMangledName(GD);
9798     scanForTargetRegionsFunctions(FD->getBody(), Name);
9799     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9800         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9801     // Do not emit device_type(nohost) functions for the host.
9802     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9803       return true;
9804   }
9805 
9806   // Do not to emit function if it is not marked as declare target.
9807   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9808          AlreadyEmittedTargetDecls.count(VD) == 0;
9809 }
9810 
9811 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9812   if (!CGM.getLangOpts().OpenMPIsDevice)
9813     return false;
9814 
9815   // Check if there are Ctors/Dtors in this declaration and look for target
9816   // regions in it. We use the complete variant to produce the kernel name
9817   // mangling.
9818   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9819   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9820     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9821       StringRef ParentName =
9822           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9823       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9824     }
9825     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9826       StringRef ParentName =
9827           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9828       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9829     }
9830   }
9831 
9832   // Do not to emit variable if it is not marked as declare target.
9833   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9834       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9835           cast<VarDecl>(GD.getDecl()));
9836   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9837       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9838        HasRequiresUnifiedSharedMemory)) {
9839     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9840     return true;
9841   }
9842   return false;
9843 }
9844 
9845 llvm::Constant *
9846 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9847                                                 const VarDecl *VD) {
9848   assert(VD->getType().isConstant(CGM.getContext()) &&
9849          "Expected constant variable.");
9850   StringRef VarName;
9851   llvm::Constant *Addr;
9852   llvm::GlobalValue::LinkageTypes Linkage;
9853   QualType Ty = VD->getType();
9854   SmallString<128> Buffer;
9855   {
9856     unsigned DeviceID;
9857     unsigned FileID;
9858     unsigned Line;
9859     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9860                              FileID, Line);
9861     llvm::raw_svector_ostream OS(Buffer);
9862     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9863        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9864     VarName = OS.str();
9865   }
9866   Linkage = llvm::GlobalValue::InternalLinkage;
9867   Addr =
9868       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9869                                   getDefaultFirstprivateAddressSpace());
9870   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9871   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9872   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9873   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9874       VarName, Addr, VarSize,
9875       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9876   return Addr;
9877 }
9878 
9879 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9880                                                    llvm::Constant *Addr) {
9881   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9882       !CGM.getLangOpts().OpenMPIsDevice)
9883     return;
9884   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9885       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9886   if (!Res) {
9887     if (CGM.getLangOpts().OpenMPIsDevice) {
9888       // Register non-target variables being emitted in device code (debug info
9889       // may cause this).
9890       StringRef VarName = CGM.getMangledName(VD);
9891       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9892     }
9893     return;
9894   }
9895   // Register declare target variables.
9896   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9897   StringRef VarName;
9898   CharUnits VarSize;
9899   llvm::GlobalValue::LinkageTypes Linkage;
9900 
9901   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9902       !HasRequiresUnifiedSharedMemory) {
9903     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9904     VarName = CGM.getMangledName(VD);
9905     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9906       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9907       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9908     } else {
9909       VarSize = CharUnits::Zero();
9910     }
9911     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9912     // Temp solution to prevent optimizations of the internal variables.
9913     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9914       std::string RefName = getName({VarName, "ref"});
9915       if (!CGM.GetGlobalValue(RefName)) {
9916         llvm::Constant *AddrRef =
9917             getOrCreateInternalVariable(Addr->getType(), RefName);
9918         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9919         GVAddrRef->setConstant(/*Val=*/true);
9920         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9921         GVAddrRef->setInitializer(Addr);
9922         CGM.addCompilerUsedGlobal(GVAddrRef);
9923       }
9924     }
9925   } else {
9926     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9927             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9928              HasRequiresUnifiedSharedMemory)) &&
9929            "Declare target attribute must link or to with unified memory.");
9930     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9931       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9932     else
9933       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9934 
9935     if (CGM.getLangOpts().OpenMPIsDevice) {
9936       VarName = Addr->getName();
9937       Addr = nullptr;
9938     } else {
9939       VarName = getAddrOfDeclareTargetVar(VD).getName();
9940       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9941     }
9942     VarSize = CGM.getPointerSize();
9943     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9944   }
9945 
9946   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9947       VarName, Addr, VarSize, Flags, Linkage);
9948 }
9949 
9950 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9951   if (isa<FunctionDecl>(GD.getDecl()) ||
9952       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9953     return emitTargetFunctions(GD);
9954 
9955   return emitTargetGlobalVariable(GD);
9956 }
9957 
9958 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9959   for (const VarDecl *VD : DeferredGlobalVariables) {
9960     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9961         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9962     if (!Res)
9963       continue;
9964     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9965         !HasRequiresUnifiedSharedMemory) {
9966       CGM.EmitGlobal(VD);
9967     } else {
9968       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9969               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9970                HasRequiresUnifiedSharedMemory)) &&
9971              "Expected link clause or to clause with unified memory.");
9972       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9973     }
9974   }
9975 }
9976 
9977 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9978     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9979   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9980          " Expected target-based directive.");
9981 }
9982 
9983 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9984   for (const OMPClause *Clause : D->clauselists()) {
9985     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9986       HasRequiresUnifiedSharedMemory = true;
9987     } else if (const auto *AC =
9988                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9989       switch (AC->getAtomicDefaultMemOrderKind()) {
9990       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9991         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9992         break;
9993       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9994         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9995         break;
9996       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9997         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9998         break;
9999       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10000         break;
10001       }
10002     }
10003   }
10004 }
10005 
10006 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10007   return RequiresAtomicOrdering;
10008 }
10009 
10010 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10011                                                        LangAS &AS) {
10012   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10013     return false;
10014   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10015   switch(A->getAllocatorType()) {
10016   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10017   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10018   // Not supported, fallback to the default mem space.
10019   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10020   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10021   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10022   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10023   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10024   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10025   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10026     AS = LangAS::Default;
10027     return true;
10028   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10029     llvm_unreachable("Expected predefined allocator for the variables with the "
10030                      "static storage.");
10031   }
10032   return false;
10033 }
10034 
10035 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10036   return HasRequiresUnifiedSharedMemory;
10037 }
10038 
10039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10040     CodeGenModule &CGM)
10041     : CGM(CGM) {
10042   if (CGM.getLangOpts().OpenMPIsDevice) {
10043     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10044     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10045   }
10046 }
10047 
10048 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10049   if (CGM.getLangOpts().OpenMPIsDevice)
10050     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10051 }
10052 
10053 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10054   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10055     return true;
10056 
10057   const auto *D = cast<FunctionDecl>(GD.getDecl());
10058   // Do not to emit function if it is marked as declare target as it was already
10059   // emitted.
10060   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10061     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10062       if (auto *F = dyn_cast_or_null<llvm::Function>(
10063               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10064         return !F->isDeclaration();
10065       return false;
10066     }
10067     return true;
10068   }
10069 
10070   return !AlreadyEmittedTargetDecls.insert(D).second;
10071 }
10072 
10073 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10074   // If we don't have entries or if we are emitting code for the device, we
10075   // don't need to do anything.
10076   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10077       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10078       (OffloadEntriesInfoManager.empty() &&
10079        !HasEmittedDeclareTargetRegion &&
10080        !HasEmittedTargetRegion))
10081     return nullptr;
10082 
10083   // Create and register the function that handles the requires directives.
10084   ASTContext &C = CGM.getContext();
10085 
10086   llvm::Function *RequiresRegFn;
10087   {
10088     CodeGenFunction CGF(CGM);
10089     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10090     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10091     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10092     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10093     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10094     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10095     // TODO: check for other requires clauses.
10096     // The requires directive takes effect only when a target region is
10097     // present in the compilation unit. Otherwise it is ignored and not
10098     // passed to the runtime. This avoids the runtime from throwing an error
10099     // for mismatching requires clauses across compilation units that don't
10100     // contain at least 1 target region.
10101     assert((HasEmittedTargetRegion ||
10102             HasEmittedDeclareTargetRegion ||
10103             !OffloadEntriesInfoManager.empty()) &&
10104            "Target or declare target region expected.");
10105     if (HasRequiresUnifiedSharedMemory)
10106       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10107     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10108                             CGM.getModule(), OMPRTL___tgt_register_requires),
10109                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10110     CGF.FinishFunction();
10111   }
10112   return RequiresRegFn;
10113 }
10114 
10115 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10116                                     const OMPExecutableDirective &D,
10117                                     SourceLocation Loc,
10118                                     llvm::Function *OutlinedFn,
10119                                     ArrayRef<llvm::Value *> CapturedVars) {
10120   if (!CGF.HaveInsertPoint())
10121     return;
10122 
10123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10124   CodeGenFunction::RunCleanupsScope Scope(CGF);
10125 
10126   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10127   llvm::Value *Args[] = {
10128       RTLoc,
10129       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10130       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10131   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10132   RealArgs.append(std::begin(Args), std::end(Args));
10133   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10134 
10135   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10136       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10137   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10138 }
10139 
10140 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10141                                          const Expr *NumTeams,
10142                                          const Expr *ThreadLimit,
10143                                          SourceLocation Loc) {
10144   if (!CGF.HaveInsertPoint())
10145     return;
10146 
10147   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10148 
10149   llvm::Value *NumTeamsVal =
10150       NumTeams
10151           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10152                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10153           : CGF.Builder.getInt32(0);
10154 
10155   llvm::Value *ThreadLimitVal =
10156       ThreadLimit
10157           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10158                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10159           : CGF.Builder.getInt32(0);
10160 
10161   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10162   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10163                                      ThreadLimitVal};
10164   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10165                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10166                       PushNumTeamsArgs);
10167 }
10168 
10169 void CGOpenMPRuntime::emitTargetDataCalls(
10170     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10171     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10172   if (!CGF.HaveInsertPoint())
10173     return;
10174 
10175   // Action used to replace the default codegen action and turn privatization
10176   // off.
10177   PrePostActionTy NoPrivAction;
10178 
10179   // Generate the code for the opening of the data environment. Capture all the
10180   // arguments of the runtime call by reference because they are used in the
10181   // closing of the region.
10182   auto &&BeginThenGen = [this, &D, Device, &Info,
10183                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10184     // Fill up the arrays with all the mapped variables.
10185     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10186 
10187     // Get map clause information.
10188     MappableExprsHandler MEHandler(D, CGF);
10189     MEHandler.generateAllInfo(CombinedInfo);
10190 
10191     // Fill up the arrays and create the arguments.
10192     emitOffloadingArrays(CGF, CombinedInfo, Info);
10193 
10194     llvm::Value *BasePointersArrayArg = nullptr;
10195     llvm::Value *PointersArrayArg = nullptr;
10196     llvm::Value *SizesArrayArg = nullptr;
10197     llvm::Value *MapTypesArrayArg = nullptr;
10198     llvm::Value *MappersArrayArg = nullptr;
10199     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10200                                  SizesArrayArg, MapTypesArrayArg,
10201                                  MappersArrayArg, Info);
10202 
10203     // Emit device ID if any.
10204     llvm::Value *DeviceID = nullptr;
10205     if (Device) {
10206       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10207                                            CGF.Int64Ty, /*isSigned=*/true);
10208     } else {
10209       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10210     }
10211 
10212     // Emit the number of elements in the offloading arrays.
10213     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10214 
10215     llvm::Value *OffloadingArgs[] = {
10216         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10217         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10218     CGF.EmitRuntimeCall(
10219         OMPBuilder.getOrCreateRuntimeFunction(
10220             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10221         OffloadingArgs);
10222 
10223     // If device pointer privatization is required, emit the body of the region
10224     // here. It will have to be duplicated: with and without privatization.
10225     if (!Info.CaptureDeviceAddrMap.empty())
10226       CodeGen(CGF);
10227   };
10228 
10229   // Generate code for the closing of the data region.
10230   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10231                                             PrePostActionTy &) {
10232     assert(Info.isValid() && "Invalid data environment closing arguments.");
10233 
10234     llvm::Value *BasePointersArrayArg = nullptr;
10235     llvm::Value *PointersArrayArg = nullptr;
10236     llvm::Value *SizesArrayArg = nullptr;
10237     llvm::Value *MapTypesArrayArg = nullptr;
10238     llvm::Value *MappersArrayArg = nullptr;
10239     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10240                                  SizesArrayArg, MapTypesArrayArg,
10241                                  MappersArrayArg, Info);
10242 
10243     // Emit device ID if any.
10244     llvm::Value *DeviceID = nullptr;
10245     if (Device) {
10246       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10247                                            CGF.Int64Ty, /*isSigned=*/true);
10248     } else {
10249       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10250     }
10251 
10252     // Emit the number of elements in the offloading arrays.
10253     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10254 
10255     llvm::Value *OffloadingArgs[] = {
10256         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10257         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10258     CGF.EmitRuntimeCall(
10259         OMPBuilder.getOrCreateRuntimeFunction(
10260             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10261         OffloadingArgs);
10262   };
10263 
10264   // If we need device pointer privatization, we need to emit the body of the
10265   // region with no privatization in the 'else' branch of the conditional.
10266   // Otherwise, we don't have to do anything.
10267   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10268                                                          PrePostActionTy &) {
10269     if (!Info.CaptureDeviceAddrMap.empty()) {
10270       CodeGen.setAction(NoPrivAction);
10271       CodeGen(CGF);
10272     }
10273   };
10274 
10275   // We don't have to do anything to close the region if the if clause evaluates
10276   // to false.
10277   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10278 
10279   if (IfCond) {
10280     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10281   } else {
10282     RegionCodeGenTy RCG(BeginThenGen);
10283     RCG(CGF);
10284   }
10285 
10286   // If we don't require privatization of device pointers, we emit the body in
10287   // between the runtime calls. This avoids duplicating the body code.
10288   if (Info.CaptureDeviceAddrMap.empty()) {
10289     CodeGen.setAction(NoPrivAction);
10290     CodeGen(CGF);
10291   }
10292 
10293   if (IfCond) {
10294     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10295   } else {
10296     RegionCodeGenTy RCG(EndThenGen);
10297     RCG(CGF);
10298   }
10299 }
10300 
10301 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10302     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10303     const Expr *Device) {
10304   if (!CGF.HaveInsertPoint())
10305     return;
10306 
10307   assert((isa<OMPTargetEnterDataDirective>(D) ||
10308           isa<OMPTargetExitDataDirective>(D) ||
10309           isa<OMPTargetUpdateDirective>(D)) &&
10310          "Expecting either target enter, exit data, or update directives.");
10311 
10312   CodeGenFunction::OMPTargetDataInfo InputInfo;
10313   llvm::Value *MapTypesArray = nullptr;
10314   // Generate the code for the opening of the data environment.
10315   auto &&ThenGen = [this, &D, Device, &InputInfo,
10316                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10317     // Emit device ID if any.
10318     llvm::Value *DeviceID = nullptr;
10319     if (Device) {
10320       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10321                                            CGF.Int64Ty, /*isSigned=*/true);
10322     } else {
10323       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10324     }
10325 
10326     // Emit the number of elements in the offloading arrays.
10327     llvm::Constant *PointerNum =
10328         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10329 
10330     llvm::Value *OffloadingArgs[] = {DeviceID,
10331                                      PointerNum,
10332                                      InputInfo.BasePointersArray.getPointer(),
10333                                      InputInfo.PointersArray.getPointer(),
10334                                      InputInfo.SizesArray.getPointer(),
10335                                      MapTypesArray,
10336                                      InputInfo.MappersArray.getPointer()};
10337 
10338     // Select the right runtime function call for each standalone
10339     // directive.
10340     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10341     RuntimeFunction RTLFn;
10342     switch (D.getDirectiveKind()) {
10343     case OMPD_target_enter_data:
10344       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10345                         : OMPRTL___tgt_target_data_begin_mapper;
10346       break;
10347     case OMPD_target_exit_data:
10348       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10349                         : OMPRTL___tgt_target_data_end_mapper;
10350       break;
10351     case OMPD_target_update:
10352       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10353                         : OMPRTL___tgt_target_data_update_mapper;
10354       break;
10355     case OMPD_parallel:
10356     case OMPD_for:
10357     case OMPD_parallel_for:
10358     case OMPD_parallel_master:
10359     case OMPD_parallel_sections:
10360     case OMPD_for_simd:
10361     case OMPD_parallel_for_simd:
10362     case OMPD_cancel:
10363     case OMPD_cancellation_point:
10364     case OMPD_ordered:
10365     case OMPD_threadprivate:
10366     case OMPD_allocate:
10367     case OMPD_task:
10368     case OMPD_simd:
10369     case OMPD_sections:
10370     case OMPD_section:
10371     case OMPD_single:
10372     case OMPD_master:
10373     case OMPD_critical:
10374     case OMPD_taskyield:
10375     case OMPD_barrier:
10376     case OMPD_taskwait:
10377     case OMPD_taskgroup:
10378     case OMPD_atomic:
10379     case OMPD_flush:
10380     case OMPD_depobj:
10381     case OMPD_scan:
10382     case OMPD_teams:
10383     case OMPD_target_data:
10384     case OMPD_distribute:
10385     case OMPD_distribute_simd:
10386     case OMPD_distribute_parallel_for:
10387     case OMPD_distribute_parallel_for_simd:
10388     case OMPD_teams_distribute:
10389     case OMPD_teams_distribute_simd:
10390     case OMPD_teams_distribute_parallel_for:
10391     case OMPD_teams_distribute_parallel_for_simd:
10392     case OMPD_declare_simd:
10393     case OMPD_declare_variant:
10394     case OMPD_begin_declare_variant:
10395     case OMPD_end_declare_variant:
10396     case OMPD_declare_target:
10397     case OMPD_end_declare_target:
10398     case OMPD_declare_reduction:
10399     case OMPD_declare_mapper:
10400     case OMPD_taskloop:
10401     case OMPD_taskloop_simd:
10402     case OMPD_master_taskloop:
10403     case OMPD_master_taskloop_simd:
10404     case OMPD_parallel_master_taskloop:
10405     case OMPD_parallel_master_taskloop_simd:
10406     case OMPD_target:
10407     case OMPD_target_simd:
10408     case OMPD_target_teams_distribute:
10409     case OMPD_target_teams_distribute_simd:
10410     case OMPD_target_teams_distribute_parallel_for:
10411     case OMPD_target_teams_distribute_parallel_for_simd:
10412     case OMPD_target_teams:
10413     case OMPD_target_parallel:
10414     case OMPD_target_parallel_for:
10415     case OMPD_target_parallel_for_simd:
10416     case OMPD_requires:
10417     case OMPD_unknown:
10418     default:
10419       llvm_unreachable("Unexpected standalone target data directive.");
10420       break;
10421     }
10422     CGF.EmitRuntimeCall(
10423         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10424         OffloadingArgs);
10425   };
10426 
10427   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10428                              CodeGenFunction &CGF, PrePostActionTy &) {
10429     // Fill up the arrays with all the mapped variables.
10430     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10431 
10432     // Get map clause information.
10433     MappableExprsHandler MEHandler(D, CGF);
10434     MEHandler.generateAllInfo(CombinedInfo);
10435 
10436     TargetDataInfo Info;
10437     // Fill up the arrays and create the arguments.
10438     emitOffloadingArrays(CGF, CombinedInfo, Info);
10439     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10440                                  Info.PointersArray, Info.SizesArray,
10441                                  Info.MapTypesArray, Info.MappersArray, Info);
10442     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10443     InputInfo.BasePointersArray =
10444         Address(Info.BasePointersArray, CGM.getPointerAlign());
10445     InputInfo.PointersArray =
10446         Address(Info.PointersArray, CGM.getPointerAlign());
10447     InputInfo.SizesArray =
10448         Address(Info.SizesArray, CGM.getPointerAlign());
10449     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10450     MapTypesArray = Info.MapTypesArray;
10451     if (D.hasClausesOfKind<OMPDependClause>())
10452       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10453     else
10454       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10455   };
10456 
10457   if (IfCond) {
10458     emitIfClause(CGF, IfCond, TargetThenGen,
10459                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10460   } else {
10461     RegionCodeGenTy ThenRCG(TargetThenGen);
10462     ThenRCG(CGF);
10463   }
10464 }
10465 
10466 namespace {
10467   /// Kind of parameter in a function with 'declare simd' directive.
10468   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10469   /// Attribute set of the parameter.
10470   struct ParamAttrTy {
10471     ParamKindTy Kind = Vector;
10472     llvm::APSInt StrideOrArg;
10473     llvm::APSInt Alignment;
10474   };
10475 } // namespace
10476 
10477 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10478                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10479   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10480   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10481   // of that clause. The VLEN value must be power of 2.
10482   // In other case the notion of the function`s "characteristic data type" (CDT)
10483   // is used to compute the vector length.
10484   // CDT is defined in the following order:
10485   //   a) For non-void function, the CDT is the return type.
10486   //   b) If the function has any non-uniform, non-linear parameters, then the
10487   //   CDT is the type of the first such parameter.
10488   //   c) If the CDT determined by a) or b) above is struct, union, or class
10489   //   type which is pass-by-value (except for the type that maps to the
10490   //   built-in complex data type), the characteristic data type is int.
10491   //   d) If none of the above three cases is applicable, the CDT is int.
10492   // The VLEN is then determined based on the CDT and the size of vector
10493   // register of that ISA for which current vector version is generated. The
10494   // VLEN is computed using the formula below:
10495   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10496   // where vector register size specified in section 3.2.1 Registers and the
10497   // Stack Frame of original AMD64 ABI document.
10498   QualType RetType = FD->getReturnType();
10499   if (RetType.isNull())
10500     return 0;
10501   ASTContext &C = FD->getASTContext();
10502   QualType CDT;
10503   if (!RetType.isNull() && !RetType->isVoidType()) {
10504     CDT = RetType;
10505   } else {
10506     unsigned Offset = 0;
10507     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10508       if (ParamAttrs[Offset].Kind == Vector)
10509         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10510       ++Offset;
10511     }
10512     if (CDT.isNull()) {
10513       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10514         if (ParamAttrs[I + Offset].Kind == Vector) {
10515           CDT = FD->getParamDecl(I)->getType();
10516           break;
10517         }
10518       }
10519     }
10520   }
10521   if (CDT.isNull())
10522     CDT = C.IntTy;
10523   CDT = CDT->getCanonicalTypeUnqualified();
10524   if (CDT->isRecordType() || CDT->isUnionType())
10525     CDT = C.IntTy;
10526   return C.getTypeSize(CDT);
10527 }
10528 
10529 static void
10530 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10531                            const llvm::APSInt &VLENVal,
10532                            ArrayRef<ParamAttrTy> ParamAttrs,
10533                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10534   struct ISADataTy {
10535     char ISA;
10536     unsigned VecRegSize;
10537   };
10538   ISADataTy ISAData[] = {
10539       {
10540           'b', 128
10541       }, // SSE
10542       {
10543           'c', 256
10544       }, // AVX
10545       {
10546           'd', 256
10547       }, // AVX2
10548       {
10549           'e', 512
10550       }, // AVX512
10551   };
10552   llvm::SmallVector<char, 2> Masked;
10553   switch (State) {
10554   case OMPDeclareSimdDeclAttr::BS_Undefined:
10555     Masked.push_back('N');
10556     Masked.push_back('M');
10557     break;
10558   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10559     Masked.push_back('N');
10560     break;
10561   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10562     Masked.push_back('M');
10563     break;
10564   }
10565   for (char Mask : Masked) {
10566     for (const ISADataTy &Data : ISAData) {
10567       SmallString<256> Buffer;
10568       llvm::raw_svector_ostream Out(Buffer);
10569       Out << "_ZGV" << Data.ISA << Mask;
10570       if (!VLENVal) {
10571         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10572         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10573         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10574       } else {
10575         Out << VLENVal;
10576       }
10577       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10578         switch (ParamAttr.Kind){
10579         case LinearWithVarStride:
10580           Out << 's' << ParamAttr.StrideOrArg;
10581           break;
10582         case Linear:
10583           Out << 'l';
10584           if (ParamAttr.StrideOrArg != 1)
10585             Out << ParamAttr.StrideOrArg;
10586           break;
10587         case Uniform:
10588           Out << 'u';
10589           break;
10590         case Vector:
10591           Out << 'v';
10592           break;
10593         }
10594         if (!!ParamAttr.Alignment)
10595           Out << 'a' << ParamAttr.Alignment;
10596       }
10597       Out << '_' << Fn->getName();
10598       Fn->addFnAttr(Out.str());
10599     }
10600   }
10601 }
10602 
10603 // This are the Functions that are needed to mangle the name of the
10604 // vector functions generated by the compiler, according to the rules
10605 // defined in the "Vector Function ABI specifications for AArch64",
10606 // available at
10607 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10608 
10609 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10610 ///
10611 /// TODO: Need to implement the behavior for reference marked with a
10612 /// var or no linear modifiers (1.b in the section). For this, we
10613 /// need to extend ParamKindTy to support the linear modifiers.
10614 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10615   QT = QT.getCanonicalType();
10616 
10617   if (QT->isVoidType())
10618     return false;
10619 
10620   if (Kind == ParamKindTy::Uniform)
10621     return false;
10622 
10623   if (Kind == ParamKindTy::Linear)
10624     return false;
10625 
10626   // TODO: Handle linear references with modifiers
10627 
10628   if (Kind == ParamKindTy::LinearWithVarStride)
10629     return false;
10630 
10631   return true;
10632 }
10633 
10634 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10635 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10636   QT = QT.getCanonicalType();
10637   unsigned Size = C.getTypeSize(QT);
10638 
10639   // Only scalars and complex within 16 bytes wide set PVB to true.
10640   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10641     return false;
10642 
10643   if (QT->isFloatingType())
10644     return true;
10645 
10646   if (QT->isIntegerType())
10647     return true;
10648 
10649   if (QT->isPointerType())
10650     return true;
10651 
10652   // TODO: Add support for complex types (section 3.1.2, item 2).
10653 
10654   return false;
10655 }
10656 
10657 /// Computes the lane size (LS) of a return type or of an input parameter,
10658 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10659 /// TODO: Add support for references, section 3.2.1, item 1.
10660 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10661   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10662     QualType PTy = QT.getCanonicalType()->getPointeeType();
10663     if (getAArch64PBV(PTy, C))
10664       return C.getTypeSize(PTy);
10665   }
10666   if (getAArch64PBV(QT, C))
10667     return C.getTypeSize(QT);
10668 
10669   return C.getTypeSize(C.getUIntPtrType());
10670 }
10671 
10672 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10673 // signature of the scalar function, as defined in 3.2.2 of the
10674 // AAVFABI.
10675 static std::tuple<unsigned, unsigned, bool>
10676 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10677   QualType RetType = FD->getReturnType().getCanonicalType();
10678 
10679   ASTContext &C = FD->getASTContext();
10680 
10681   bool OutputBecomesInput = false;
10682 
10683   llvm::SmallVector<unsigned, 8> Sizes;
10684   if (!RetType->isVoidType()) {
10685     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10686     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10687       OutputBecomesInput = true;
10688   }
10689   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10690     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10691     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10692   }
10693 
10694   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10695   // The LS of a function parameter / return value can only be a power
10696   // of 2, starting from 8 bits, up to 128.
10697   assert(std::all_of(Sizes.begin(), Sizes.end(),
10698                      [](unsigned Size) {
10699                        return Size == 8 || Size == 16 || Size == 32 ||
10700                               Size == 64 || Size == 128;
10701                      }) &&
10702          "Invalid size");
10703 
10704   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10705                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10706                          OutputBecomesInput);
10707 }
10708 
10709 /// Mangle the parameter part of the vector function name according to
10710 /// their OpenMP classification. The mangling function is defined in
10711 /// section 3.5 of the AAVFABI.
10712 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10713   SmallString<256> Buffer;
10714   llvm::raw_svector_ostream Out(Buffer);
10715   for (const auto &ParamAttr : ParamAttrs) {
10716     switch (ParamAttr.Kind) {
10717     case LinearWithVarStride:
10718       Out << "ls" << ParamAttr.StrideOrArg;
10719       break;
10720     case Linear:
10721       Out << 'l';
10722       // Don't print the step value if it is not present or if it is
10723       // equal to 1.
10724       if (ParamAttr.StrideOrArg != 1)
10725         Out << ParamAttr.StrideOrArg;
10726       break;
10727     case Uniform:
10728       Out << 'u';
10729       break;
10730     case Vector:
10731       Out << 'v';
10732       break;
10733     }
10734 
10735     if (!!ParamAttr.Alignment)
10736       Out << 'a' << ParamAttr.Alignment;
10737   }
10738 
10739   return std::string(Out.str());
10740 }
10741 
10742 // Function used to add the attribute. The parameter `VLEN` is
10743 // templated to allow the use of "x" when targeting scalable functions
10744 // for SVE.
10745 template <typename T>
10746 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10747                                  char ISA, StringRef ParSeq,
10748                                  StringRef MangledName, bool OutputBecomesInput,
10749                                  llvm::Function *Fn) {
10750   SmallString<256> Buffer;
10751   llvm::raw_svector_ostream Out(Buffer);
10752   Out << Prefix << ISA << LMask << VLEN;
10753   if (OutputBecomesInput)
10754     Out << "v";
10755   Out << ParSeq << "_" << MangledName;
10756   Fn->addFnAttr(Out.str());
10757 }
10758 
10759 // Helper function to generate the Advanced SIMD names depending on
10760 // the value of the NDS when simdlen is not present.
10761 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10762                                       StringRef Prefix, char ISA,
10763                                       StringRef ParSeq, StringRef MangledName,
10764                                       bool OutputBecomesInput,
10765                                       llvm::Function *Fn) {
10766   switch (NDS) {
10767   case 8:
10768     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10769                          OutputBecomesInput, Fn);
10770     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10771                          OutputBecomesInput, Fn);
10772     break;
10773   case 16:
10774     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10775                          OutputBecomesInput, Fn);
10776     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10777                          OutputBecomesInput, Fn);
10778     break;
10779   case 32:
10780     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10781                          OutputBecomesInput, Fn);
10782     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10783                          OutputBecomesInput, Fn);
10784     break;
10785   case 64:
10786   case 128:
10787     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10788                          OutputBecomesInput, Fn);
10789     break;
10790   default:
10791     llvm_unreachable("Scalar type is too wide.");
10792   }
10793 }
10794 
10795 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10796 static void emitAArch64DeclareSimdFunction(
10797     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10798     ArrayRef<ParamAttrTy> ParamAttrs,
10799     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10800     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10801 
10802   // Get basic data for building the vector signature.
10803   const auto Data = getNDSWDS(FD, ParamAttrs);
10804   const unsigned NDS = std::get<0>(Data);
10805   const unsigned WDS = std::get<1>(Data);
10806   const bool OutputBecomesInput = std::get<2>(Data);
10807 
10808   // Check the values provided via `simdlen` by the user.
10809   // 1. A `simdlen(1)` doesn't produce vector signatures,
10810   if (UserVLEN == 1) {
10811     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10812         DiagnosticsEngine::Warning,
10813         "The clause simdlen(1) has no effect when targeting aarch64.");
10814     CGM.getDiags().Report(SLoc, DiagID);
10815     return;
10816   }
10817 
10818   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10819   // Advanced SIMD output.
10820   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10821     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10822         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10823                                     "power of 2 when targeting Advanced SIMD.");
10824     CGM.getDiags().Report(SLoc, DiagID);
10825     return;
10826   }
10827 
10828   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10829   // limits.
10830   if (ISA == 's' && UserVLEN != 0) {
10831     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10832       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10833           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10834                                       "lanes in the architectural constraints "
10835                                       "for SVE (min is 128-bit, max is "
10836                                       "2048-bit, by steps of 128-bit)");
10837       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10838       return;
10839     }
10840   }
10841 
10842   // Sort out parameter sequence.
10843   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10844   StringRef Prefix = "_ZGV";
10845   // Generate simdlen from user input (if any).
10846   if (UserVLEN) {
10847     if (ISA == 's') {
10848       // SVE generates only a masked function.
10849       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10850                            OutputBecomesInput, Fn);
10851     } else {
10852       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10853       // Advanced SIMD generates one or two functions, depending on
10854       // the `[not]inbranch` clause.
10855       switch (State) {
10856       case OMPDeclareSimdDeclAttr::BS_Undefined:
10857         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10858                              OutputBecomesInput, Fn);
10859         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10860                              OutputBecomesInput, Fn);
10861         break;
10862       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10863         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10864                              OutputBecomesInput, Fn);
10865         break;
10866       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10867         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10868                              OutputBecomesInput, Fn);
10869         break;
10870       }
10871     }
10872   } else {
10873     // If no user simdlen is provided, follow the AAVFABI rules for
10874     // generating the vector length.
10875     if (ISA == 's') {
10876       // SVE, section 3.4.1, item 1.
10877       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10878                            OutputBecomesInput, Fn);
10879     } else {
10880       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10881       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10882       // two vector names depending on the use of the clause
10883       // `[not]inbranch`.
10884       switch (State) {
10885       case OMPDeclareSimdDeclAttr::BS_Undefined:
10886         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10887                                   OutputBecomesInput, Fn);
10888         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10889                                   OutputBecomesInput, Fn);
10890         break;
10891       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10892         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10893                                   OutputBecomesInput, Fn);
10894         break;
10895       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10896         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10897                                   OutputBecomesInput, Fn);
10898         break;
10899       }
10900     }
10901   }
10902 }
10903 
10904 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10905                                               llvm::Function *Fn) {
10906   ASTContext &C = CGM.getContext();
10907   FD = FD->getMostRecentDecl();
10908   // Map params to their positions in function decl.
10909   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10910   if (isa<CXXMethodDecl>(FD))
10911     ParamPositions.try_emplace(FD, 0);
10912   unsigned ParamPos = ParamPositions.size();
10913   for (const ParmVarDecl *P : FD->parameters()) {
10914     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10915     ++ParamPos;
10916   }
10917   while (FD) {
10918     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10919       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10920       // Mark uniform parameters.
10921       for (const Expr *E : Attr->uniforms()) {
10922         E = E->IgnoreParenImpCasts();
10923         unsigned Pos;
10924         if (isa<CXXThisExpr>(E)) {
10925           Pos = ParamPositions[FD];
10926         } else {
10927           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10928                                 ->getCanonicalDecl();
10929           Pos = ParamPositions[PVD];
10930         }
10931         ParamAttrs[Pos].Kind = Uniform;
10932       }
10933       // Get alignment info.
10934       auto NI = Attr->alignments_begin();
10935       for (const Expr *E : Attr->aligneds()) {
10936         E = E->IgnoreParenImpCasts();
10937         unsigned Pos;
10938         QualType ParmTy;
10939         if (isa<CXXThisExpr>(E)) {
10940           Pos = ParamPositions[FD];
10941           ParmTy = E->getType();
10942         } else {
10943           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10944                                 ->getCanonicalDecl();
10945           Pos = ParamPositions[PVD];
10946           ParmTy = PVD->getType();
10947         }
10948         ParamAttrs[Pos].Alignment =
10949             (*NI)
10950                 ? (*NI)->EvaluateKnownConstInt(C)
10951                 : llvm::APSInt::getUnsigned(
10952                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10953                           .getQuantity());
10954         ++NI;
10955       }
10956       // Mark linear parameters.
10957       auto SI = Attr->steps_begin();
10958       auto MI = Attr->modifiers_begin();
10959       for (const Expr *E : Attr->linears()) {
10960         E = E->IgnoreParenImpCasts();
10961         unsigned Pos;
10962         // Rescaling factor needed to compute the linear parameter
10963         // value in the mangled name.
10964         unsigned PtrRescalingFactor = 1;
10965         if (isa<CXXThisExpr>(E)) {
10966           Pos = ParamPositions[FD];
10967         } else {
10968           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10969                                 ->getCanonicalDecl();
10970           Pos = ParamPositions[PVD];
10971           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10972             PtrRescalingFactor = CGM.getContext()
10973                                      .getTypeSizeInChars(P->getPointeeType())
10974                                      .getQuantity();
10975         }
10976         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10977         ParamAttr.Kind = Linear;
10978         // Assuming a stride of 1, for `linear` without modifiers.
10979         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10980         if (*SI) {
10981           Expr::EvalResult Result;
10982           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10983             if (const auto *DRE =
10984                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10985               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10986                 ParamAttr.Kind = LinearWithVarStride;
10987                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10988                     ParamPositions[StridePVD->getCanonicalDecl()]);
10989               }
10990             }
10991           } else {
10992             ParamAttr.StrideOrArg = Result.Val.getInt();
10993           }
10994         }
10995         // If we are using a linear clause on a pointer, we need to
10996         // rescale the value of linear_step with the byte size of the
10997         // pointee type.
10998         if (Linear == ParamAttr.Kind)
10999           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11000         ++SI;
11001         ++MI;
11002       }
11003       llvm::APSInt VLENVal;
11004       SourceLocation ExprLoc;
11005       const Expr *VLENExpr = Attr->getSimdlen();
11006       if (VLENExpr) {
11007         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11008         ExprLoc = VLENExpr->getExprLoc();
11009       }
11010       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11011       if (CGM.getTriple().isX86()) {
11012         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11013       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11014         unsigned VLEN = VLENVal.getExtValue();
11015         StringRef MangledName = Fn->getName();
11016         if (CGM.getTarget().hasFeature("sve"))
11017           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11018                                          MangledName, 's', 128, Fn, ExprLoc);
11019         if (CGM.getTarget().hasFeature("neon"))
11020           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11021                                          MangledName, 'n', 128, Fn, ExprLoc);
11022       }
11023     }
11024     FD = FD->getPreviousDecl();
11025   }
11026 }
11027 
11028 namespace {
11029 /// Cleanup action for doacross support.
11030 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11031 public:
11032   static const int DoacrossFinArgs = 2;
11033 
11034 private:
11035   llvm::FunctionCallee RTLFn;
11036   llvm::Value *Args[DoacrossFinArgs];
11037 
11038 public:
11039   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11040                     ArrayRef<llvm::Value *> CallArgs)
11041       : RTLFn(RTLFn) {
11042     assert(CallArgs.size() == DoacrossFinArgs);
11043     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11044   }
11045   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11046     if (!CGF.HaveInsertPoint())
11047       return;
11048     CGF.EmitRuntimeCall(RTLFn, Args);
11049   }
11050 };
11051 } // namespace
11052 
11053 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11054                                        const OMPLoopDirective &D,
11055                                        ArrayRef<Expr *> NumIterations) {
11056   if (!CGF.HaveInsertPoint())
11057     return;
11058 
11059   ASTContext &C = CGM.getContext();
11060   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11061   RecordDecl *RD;
11062   if (KmpDimTy.isNull()) {
11063     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11064     //  kmp_int64 lo; // lower
11065     //  kmp_int64 up; // upper
11066     //  kmp_int64 st; // stride
11067     // };
11068     RD = C.buildImplicitRecord("kmp_dim");
11069     RD->startDefinition();
11070     addFieldToRecordDecl(C, RD, Int64Ty);
11071     addFieldToRecordDecl(C, RD, Int64Ty);
11072     addFieldToRecordDecl(C, RD, Int64Ty);
11073     RD->completeDefinition();
11074     KmpDimTy = C.getRecordType(RD);
11075   } else {
11076     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11077   }
11078   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11079   QualType ArrayTy =
11080       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11081 
11082   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11083   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11084   enum { LowerFD = 0, UpperFD, StrideFD };
11085   // Fill dims with data.
11086   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11087     LValue DimsLVal = CGF.MakeAddrLValue(
11088         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11089     // dims.upper = num_iterations;
11090     LValue UpperLVal = CGF.EmitLValueForField(
11091         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11092     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11093         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11094         Int64Ty, NumIterations[I]->getExprLoc());
11095     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11096     // dims.stride = 1;
11097     LValue StrideLVal = CGF.EmitLValueForField(
11098         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11099     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11100                           StrideLVal);
11101   }
11102 
11103   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11104   // kmp_int32 num_dims, struct kmp_dim * dims);
11105   llvm::Value *Args[] = {
11106       emitUpdateLocation(CGF, D.getBeginLoc()),
11107       getThreadID(CGF, D.getBeginLoc()),
11108       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11109       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11110           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11111           CGM.VoidPtrTy)};
11112 
11113   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11114       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11115   CGF.EmitRuntimeCall(RTLFn, Args);
11116   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11117       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11118   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11119       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11120   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11121                                              llvm::makeArrayRef(FiniArgs));
11122 }
11123 
11124 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11125                                           const OMPDependClause *C) {
11126   QualType Int64Ty =
11127       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11128   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11129   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11130       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11131   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11132   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11133     const Expr *CounterVal = C->getLoopData(I);
11134     assert(CounterVal);
11135     llvm::Value *CntVal = CGF.EmitScalarConversion(
11136         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11137         CounterVal->getExprLoc());
11138     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11139                           /*Volatile=*/false, Int64Ty);
11140   }
11141   llvm::Value *Args[] = {
11142       emitUpdateLocation(CGF, C->getBeginLoc()),
11143       getThreadID(CGF, C->getBeginLoc()),
11144       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11145   llvm::FunctionCallee RTLFn;
11146   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11147     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11148                                                   OMPRTL___kmpc_doacross_post);
11149   } else {
11150     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11151     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11152                                                   OMPRTL___kmpc_doacross_wait);
11153   }
11154   CGF.EmitRuntimeCall(RTLFn, Args);
11155 }
11156 
11157 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11158                                llvm::FunctionCallee Callee,
11159                                ArrayRef<llvm::Value *> Args) const {
11160   assert(Loc.isValid() && "Outlined function call location must be valid.");
11161   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11162 
11163   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11164     if (Fn->doesNotThrow()) {
11165       CGF.EmitNounwindRuntimeCall(Fn, Args);
11166       return;
11167     }
11168   }
11169   CGF.EmitRuntimeCall(Callee, Args);
11170 }
11171 
11172 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11173     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11174     ArrayRef<llvm::Value *> Args) const {
11175   emitCall(CGF, Loc, OutlinedFn, Args);
11176 }
11177 
11178 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11179   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11180     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11181       HasEmittedDeclareTargetRegion = true;
11182 }
11183 
11184 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11185                                              const VarDecl *NativeParam,
11186                                              const VarDecl *TargetParam) const {
11187   return CGF.GetAddrOfLocalVar(NativeParam);
11188 }
11189 
11190 namespace {
11191 /// Cleanup action for allocate support.
11192 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11193 public:
11194   static const int CleanupArgs = 3;
11195 
11196 private:
11197   llvm::FunctionCallee RTLFn;
11198   llvm::Value *Args[CleanupArgs];
11199 
11200 public:
11201   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11202                        ArrayRef<llvm::Value *> CallArgs)
11203       : RTLFn(RTLFn) {
11204     assert(CallArgs.size() == CleanupArgs &&
11205            "Size of arguments does not match.");
11206     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11207   }
11208   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11209     if (!CGF.HaveInsertPoint())
11210       return;
11211     CGF.EmitRuntimeCall(RTLFn, Args);
11212   }
11213 };
11214 } // namespace
11215 
11216 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11217                                                    const VarDecl *VD) {
11218   if (!VD)
11219     return Address::invalid();
11220   const VarDecl *CVD = VD->getCanonicalDecl();
11221   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11222     return Address::invalid();
11223   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11224   // Use the default allocation.
11225   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11226        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11227       !AA->getAllocator())
11228     return Address::invalid();
11229   llvm::Value *Size;
11230   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11231   if (CVD->getType()->isVariablyModifiedType()) {
11232     Size = CGF.getTypeSize(CVD->getType());
11233     // Align the size: ((size + align - 1) / align) * align
11234     Size = CGF.Builder.CreateNUWAdd(
11235         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11236     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11237     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11238   } else {
11239     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11240     Size = CGM.getSize(Sz.alignTo(Align));
11241   }
11242   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11243   assert(AA->getAllocator() &&
11244          "Expected allocator expression for non-default allocator.");
11245   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11246   // According to the standard, the original allocator type is a enum (integer).
11247   // Convert to pointer type, if required.
11248   if (Allocator->getType()->isIntegerTy())
11249     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11250   else if (Allocator->getType()->isPointerTy())
11251     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11252                                                                 CGM.VoidPtrTy);
11253   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11254 
11255   llvm::Value *Addr =
11256       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11257                               CGM.getModule(), OMPRTL___kmpc_alloc),
11258                           Args, getName({CVD->getName(), ".void.addr"}));
11259   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11260                                                               Allocator};
11261   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11262       CGM.getModule(), OMPRTL___kmpc_free);
11263 
11264   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11265                                                 llvm::makeArrayRef(FiniArgs));
11266   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11267       Addr,
11268       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11269       getName({CVD->getName(), ".addr"}));
11270   return Address(Addr, Align);
11271 }
11272 
11273 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11274     CodeGenModule &CGM, const OMPLoopDirective &S)
11275     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11276   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11277   if (!NeedToPush)
11278     return;
11279   NontemporalDeclsSet &DS =
11280       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11281   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11282     for (const Stmt *Ref : C->private_refs()) {
11283       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11284       const ValueDecl *VD;
11285       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11286         VD = DRE->getDecl();
11287       } else {
11288         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11289         assert((ME->isImplicitCXXThis() ||
11290                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11291                "Expected member of current class.");
11292         VD = ME->getMemberDecl();
11293       }
11294       DS.insert(VD);
11295     }
11296   }
11297 }
11298 
11299 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11300   if (!NeedToPush)
11301     return;
11302   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11303 }
11304 
11305 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11306   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11307 
11308   return llvm::any_of(
11309       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11310       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11311 }
11312 
11313 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11314     const OMPExecutableDirective &S,
11315     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11316     const {
11317   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11318   // Vars in target/task regions must be excluded completely.
11319   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11320       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11321     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11322     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11323     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11324     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11325       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11326         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11327     }
11328   }
11329   // Exclude vars in private clauses.
11330   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11331     for (const Expr *Ref : C->varlists()) {
11332       if (!Ref->getType()->isScalarType())
11333         continue;
11334       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11335       if (!DRE)
11336         continue;
11337       NeedToCheckForLPCs.insert(DRE->getDecl());
11338     }
11339   }
11340   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11341     for (const Expr *Ref : C->varlists()) {
11342       if (!Ref->getType()->isScalarType())
11343         continue;
11344       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11345       if (!DRE)
11346         continue;
11347       NeedToCheckForLPCs.insert(DRE->getDecl());
11348     }
11349   }
11350   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11351     for (const Expr *Ref : C->varlists()) {
11352       if (!Ref->getType()->isScalarType())
11353         continue;
11354       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11355       if (!DRE)
11356         continue;
11357       NeedToCheckForLPCs.insert(DRE->getDecl());
11358     }
11359   }
11360   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11361     for (const Expr *Ref : C->varlists()) {
11362       if (!Ref->getType()->isScalarType())
11363         continue;
11364       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11365       if (!DRE)
11366         continue;
11367       NeedToCheckForLPCs.insert(DRE->getDecl());
11368     }
11369   }
11370   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11371     for (const Expr *Ref : C->varlists()) {
11372       if (!Ref->getType()->isScalarType())
11373         continue;
11374       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11375       if (!DRE)
11376         continue;
11377       NeedToCheckForLPCs.insert(DRE->getDecl());
11378     }
11379   }
11380   for (const Decl *VD : NeedToCheckForLPCs) {
11381     for (const LastprivateConditionalData &Data :
11382          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11383       if (Data.DeclToUniqueName.count(VD) > 0) {
11384         if (!Data.Disabled)
11385           NeedToAddForLPCsAsDisabled.insert(VD);
11386         break;
11387       }
11388     }
11389   }
11390 }
11391 
11392 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11393     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11394     : CGM(CGF.CGM),
11395       Action((CGM.getLangOpts().OpenMP >= 50 &&
11396               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11397                            [](const OMPLastprivateClause *C) {
11398                              return C->getKind() ==
11399                                     OMPC_LASTPRIVATE_conditional;
11400                            }))
11401                  ? ActionToDo::PushAsLastprivateConditional
11402                  : ActionToDo::DoNotPush) {
11403   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11404   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11405     return;
11406   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11407          "Expected a push action.");
11408   LastprivateConditionalData &Data =
11409       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11410   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11411     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11412       continue;
11413 
11414     for (const Expr *Ref : C->varlists()) {
11415       Data.DeclToUniqueName.insert(std::make_pair(
11416           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11417           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11418     }
11419   }
11420   Data.IVLVal = IVLVal;
11421   Data.Fn = CGF.CurFn;
11422 }
11423 
11424 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11425     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11426     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11427   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11428   if (CGM.getLangOpts().OpenMP < 50)
11429     return;
11430   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11431   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11432   if (!NeedToAddForLPCsAsDisabled.empty()) {
11433     Action = ActionToDo::DisableLastprivateConditional;
11434     LastprivateConditionalData &Data =
11435         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11436     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11437       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11438     Data.Fn = CGF.CurFn;
11439     Data.Disabled = true;
11440   }
11441 }
11442 
11443 CGOpenMPRuntime::LastprivateConditionalRAII
11444 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11445     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11446   return LastprivateConditionalRAII(CGF, S);
11447 }
11448 
11449 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11450   if (CGM.getLangOpts().OpenMP < 50)
11451     return;
11452   if (Action == ActionToDo::DisableLastprivateConditional) {
11453     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11454            "Expected list of disabled private vars.");
11455     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11456   }
11457   if (Action == ActionToDo::PushAsLastprivateConditional) {
11458     assert(
11459         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11460         "Expected list of lastprivate conditional vars.");
11461     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11462   }
11463 }
11464 
11465 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11466                                                         const VarDecl *VD) {
11467   ASTContext &C = CGM.getContext();
11468   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11469   if (I == LastprivateConditionalToTypes.end())
11470     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11471   QualType NewType;
11472   const FieldDecl *VDField;
11473   const FieldDecl *FiredField;
11474   LValue BaseLVal;
11475   auto VI = I->getSecond().find(VD);
11476   if (VI == I->getSecond().end()) {
11477     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11478     RD->startDefinition();
11479     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11480     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11481     RD->completeDefinition();
11482     NewType = C.getRecordType(RD);
11483     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11484     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11485     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11486   } else {
11487     NewType = std::get<0>(VI->getSecond());
11488     VDField = std::get<1>(VI->getSecond());
11489     FiredField = std::get<2>(VI->getSecond());
11490     BaseLVal = std::get<3>(VI->getSecond());
11491   }
11492   LValue FiredLVal =
11493       CGF.EmitLValueForField(BaseLVal, FiredField);
11494   CGF.EmitStoreOfScalar(
11495       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11496       FiredLVal);
11497   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11498 }
11499 
11500 namespace {
11501 /// Checks if the lastprivate conditional variable is referenced in LHS.
11502 class LastprivateConditionalRefChecker final
11503     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11504   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11505   const Expr *FoundE = nullptr;
11506   const Decl *FoundD = nullptr;
11507   StringRef UniqueDeclName;
11508   LValue IVLVal;
11509   llvm::Function *FoundFn = nullptr;
11510   SourceLocation Loc;
11511 
11512 public:
11513   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11514     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11515          llvm::reverse(LPM)) {
11516       auto It = D.DeclToUniqueName.find(E->getDecl());
11517       if (It == D.DeclToUniqueName.end())
11518         continue;
11519       if (D.Disabled)
11520         return false;
11521       FoundE = E;
11522       FoundD = E->getDecl()->getCanonicalDecl();
11523       UniqueDeclName = It->second;
11524       IVLVal = D.IVLVal;
11525       FoundFn = D.Fn;
11526       break;
11527     }
11528     return FoundE == E;
11529   }
11530   bool VisitMemberExpr(const MemberExpr *E) {
11531     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11532       return false;
11533     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11534          llvm::reverse(LPM)) {
11535       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11536       if (It == D.DeclToUniqueName.end())
11537         continue;
11538       if (D.Disabled)
11539         return false;
11540       FoundE = E;
11541       FoundD = E->getMemberDecl()->getCanonicalDecl();
11542       UniqueDeclName = It->second;
11543       IVLVal = D.IVLVal;
11544       FoundFn = D.Fn;
11545       break;
11546     }
11547     return FoundE == E;
11548   }
11549   bool VisitStmt(const Stmt *S) {
11550     for (const Stmt *Child : S->children()) {
11551       if (!Child)
11552         continue;
11553       if (const auto *E = dyn_cast<Expr>(Child))
11554         if (!E->isGLValue())
11555           continue;
11556       if (Visit(Child))
11557         return true;
11558     }
11559     return false;
11560   }
11561   explicit LastprivateConditionalRefChecker(
11562       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11563       : LPM(LPM) {}
11564   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11565   getFoundData() const {
11566     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11567   }
11568 };
11569 } // namespace
11570 
11571 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11572                                                        LValue IVLVal,
11573                                                        StringRef UniqueDeclName,
11574                                                        LValue LVal,
11575                                                        SourceLocation Loc) {
11576   // Last updated loop counter for the lastprivate conditional var.
11577   // int<xx> last_iv = 0;
11578   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11579   llvm::Constant *LastIV =
11580       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11581   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11582       IVLVal.getAlignment().getAsAlign());
11583   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11584 
11585   // Last value of the lastprivate conditional.
11586   // decltype(priv_a) last_a;
11587   llvm::Constant *Last = getOrCreateInternalVariable(
11588       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11589   cast<llvm::GlobalVariable>(Last)->setAlignment(
11590       LVal.getAlignment().getAsAlign());
11591   LValue LastLVal =
11592       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11593 
11594   // Global loop counter. Required to handle inner parallel-for regions.
11595   // iv
11596   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11597 
11598   // #pragma omp critical(a)
11599   // if (last_iv <= iv) {
11600   //   last_iv = iv;
11601   //   last_a = priv_a;
11602   // }
11603   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11604                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11605     Action.Enter(CGF);
11606     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11607     // (last_iv <= iv) ? Check if the variable is updated and store new
11608     // value in global var.
11609     llvm::Value *CmpRes;
11610     if (IVLVal.getType()->isSignedIntegerType()) {
11611       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11612     } else {
11613       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11614              "Loop iteration variable must be integer.");
11615       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11616     }
11617     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11618     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11619     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11620     // {
11621     CGF.EmitBlock(ThenBB);
11622 
11623     //   last_iv = iv;
11624     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11625 
11626     //   last_a = priv_a;
11627     switch (CGF.getEvaluationKind(LVal.getType())) {
11628     case TEK_Scalar: {
11629       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11630       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11631       break;
11632     }
11633     case TEK_Complex: {
11634       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11635       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11636       break;
11637     }
11638     case TEK_Aggregate:
11639       llvm_unreachable(
11640           "Aggregates are not supported in lastprivate conditional.");
11641     }
11642     // }
11643     CGF.EmitBranch(ExitBB);
11644     // There is no need to emit line number for unconditional branch.
11645     (void)ApplyDebugLocation::CreateEmpty(CGF);
11646     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11647   };
11648 
11649   if (CGM.getLangOpts().OpenMPSimd) {
11650     // Do not emit as a critical region as no parallel region could be emitted.
11651     RegionCodeGenTy ThenRCG(CodeGen);
11652     ThenRCG(CGF);
11653   } else {
11654     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11655   }
11656 }
11657 
11658 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11659                                                          const Expr *LHS) {
11660   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11661     return;
11662   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11663   if (!Checker.Visit(LHS))
11664     return;
11665   const Expr *FoundE;
11666   const Decl *FoundD;
11667   StringRef UniqueDeclName;
11668   LValue IVLVal;
11669   llvm::Function *FoundFn;
11670   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11671       Checker.getFoundData();
11672   if (FoundFn != CGF.CurFn) {
11673     // Special codegen for inner parallel regions.
11674     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11675     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11676     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11677            "Lastprivate conditional is not found in outer region.");
11678     QualType StructTy = std::get<0>(It->getSecond());
11679     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11680     LValue PrivLVal = CGF.EmitLValue(FoundE);
11681     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11682         PrivLVal.getAddress(CGF),
11683         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11684     LValue BaseLVal =
11685         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11686     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11687     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11688                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11689                         FiredLVal, llvm::AtomicOrdering::Unordered,
11690                         /*IsVolatile=*/true, /*isInit=*/false);
11691     return;
11692   }
11693 
11694   // Private address of the lastprivate conditional in the current context.
11695   // priv_a
11696   LValue LVal = CGF.EmitLValue(FoundE);
11697   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11698                                    FoundE->getExprLoc());
11699 }
11700 
11701 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11702     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11703     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11704   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11705     return;
11706   auto Range = llvm::reverse(LastprivateConditionalStack);
11707   auto It = llvm::find_if(
11708       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11709   if (It == Range.end() || It->Fn != CGF.CurFn)
11710     return;
11711   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11712   assert(LPCI != LastprivateConditionalToTypes.end() &&
11713          "Lastprivates must be registered already.");
11714   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11715   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11716   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11717   for (const auto &Pair : It->DeclToUniqueName) {
11718     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11719     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11720       continue;
11721     auto I = LPCI->getSecond().find(Pair.first);
11722     assert(I != LPCI->getSecond().end() &&
11723            "Lastprivate must be rehistered already.");
11724     // bool Cmp = priv_a.Fired != 0;
11725     LValue BaseLVal = std::get<3>(I->getSecond());
11726     LValue FiredLVal =
11727         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11728     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11729     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11730     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11731     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11732     // if (Cmp) {
11733     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11734     CGF.EmitBlock(ThenBB);
11735     Address Addr = CGF.GetAddrOfLocalVar(VD);
11736     LValue LVal;
11737     if (VD->getType()->isReferenceType())
11738       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11739                                            AlignmentSource::Decl);
11740     else
11741       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11742                                 AlignmentSource::Decl);
11743     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11744                                      D.getBeginLoc());
11745     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11746     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11747     // }
11748   }
11749 }
11750 
11751 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11752     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11753     SourceLocation Loc) {
11754   if (CGF.getLangOpts().OpenMP < 50)
11755     return;
11756   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11757   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11758          "Unknown lastprivate conditional variable.");
11759   StringRef UniqueName = It->second;
11760   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11761   // The variable was not updated in the region - exit.
11762   if (!GV)
11763     return;
11764   LValue LPLVal = CGF.MakeAddrLValue(
11765       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11766   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11767   CGF.EmitStoreOfScalar(Res, PrivLVal);
11768 }
11769 
11770 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11771     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11772     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11773   llvm_unreachable("Not supported in SIMD-only mode");
11774 }
11775 
11776 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11777     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11778     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11779   llvm_unreachable("Not supported in SIMD-only mode");
11780 }
11781 
11782 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11783     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11784     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11785     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11786     bool Tied, unsigned &NumberOfParts) {
11787   llvm_unreachable("Not supported in SIMD-only mode");
11788 }
11789 
11790 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11791                                            SourceLocation Loc,
11792                                            llvm::Function *OutlinedFn,
11793                                            ArrayRef<llvm::Value *> CapturedVars,
11794                                            const Expr *IfCond) {
11795   llvm_unreachable("Not supported in SIMD-only mode");
11796 }
11797 
11798 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11799     CodeGenFunction &CGF, StringRef CriticalName,
11800     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11801     const Expr *Hint) {
11802   llvm_unreachable("Not supported in SIMD-only mode");
11803 }
11804 
11805 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11806                                            const RegionCodeGenTy &MasterOpGen,
11807                                            SourceLocation Loc) {
11808   llvm_unreachable("Not supported in SIMD-only mode");
11809 }
11810 
11811 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11812                                             SourceLocation Loc) {
11813   llvm_unreachable("Not supported in SIMD-only mode");
11814 }
11815 
11816 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11817     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11818     SourceLocation Loc) {
11819   llvm_unreachable("Not supported in SIMD-only mode");
11820 }
11821 
11822 void CGOpenMPSIMDRuntime::emitSingleRegion(
11823     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11824     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11825     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11826     ArrayRef<const Expr *> AssignmentOps) {
11827   llvm_unreachable("Not supported in SIMD-only mode");
11828 }
11829 
11830 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11831                                             const RegionCodeGenTy &OrderedOpGen,
11832                                             SourceLocation Loc,
11833                                             bool IsThreads) {
11834   llvm_unreachable("Not supported in SIMD-only mode");
11835 }
11836 
11837 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11838                                           SourceLocation Loc,
11839                                           OpenMPDirectiveKind Kind,
11840                                           bool EmitChecks,
11841                                           bool ForceSimpleCall) {
11842   llvm_unreachable("Not supported in SIMD-only mode");
11843 }
11844 
11845 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11846     CodeGenFunction &CGF, SourceLocation Loc,
11847     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11848     bool Ordered, const DispatchRTInput &DispatchValues) {
11849   llvm_unreachable("Not supported in SIMD-only mode");
11850 }
11851 
11852 void CGOpenMPSIMDRuntime::emitForStaticInit(
11853     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11854     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11855   llvm_unreachable("Not supported in SIMD-only mode");
11856 }
11857 
11858 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11859     CodeGenFunction &CGF, SourceLocation Loc,
11860     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11861   llvm_unreachable("Not supported in SIMD-only mode");
11862 }
11863 
11864 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11865                                                      SourceLocation Loc,
11866                                                      unsigned IVSize,
11867                                                      bool IVSigned) {
11868   llvm_unreachable("Not supported in SIMD-only mode");
11869 }
11870 
11871 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11872                                               SourceLocation Loc,
11873                                               OpenMPDirectiveKind DKind) {
11874   llvm_unreachable("Not supported in SIMD-only mode");
11875 }
11876 
11877 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11878                                               SourceLocation Loc,
11879                                               unsigned IVSize, bool IVSigned,
11880                                               Address IL, Address LB,
11881                                               Address UB, Address ST) {
11882   llvm_unreachable("Not supported in SIMD-only mode");
11883 }
11884 
11885 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11886                                                llvm::Value *NumThreads,
11887                                                SourceLocation Loc) {
11888   llvm_unreachable("Not supported in SIMD-only mode");
11889 }
11890 
11891 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11892                                              ProcBindKind ProcBind,
11893                                              SourceLocation Loc) {
11894   llvm_unreachable("Not supported in SIMD-only mode");
11895 }
11896 
11897 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11898                                                     const VarDecl *VD,
11899                                                     Address VDAddr,
11900                                                     SourceLocation Loc) {
11901   llvm_unreachable("Not supported in SIMD-only mode");
11902 }
11903 
11904 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11905     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11906     CodeGenFunction *CGF) {
11907   llvm_unreachable("Not supported in SIMD-only mode");
11908 }
11909 
11910 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11911     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11912   llvm_unreachable("Not supported in SIMD-only mode");
11913 }
11914 
11915 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11916                                     ArrayRef<const Expr *> Vars,
11917                                     SourceLocation Loc,
11918                                     llvm::AtomicOrdering AO) {
11919   llvm_unreachable("Not supported in SIMD-only mode");
11920 }
11921 
11922 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11923                                        const OMPExecutableDirective &D,
11924                                        llvm::Function *TaskFunction,
11925                                        QualType SharedsTy, Address Shareds,
11926                                        const Expr *IfCond,
11927                                        const OMPTaskDataTy &Data) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11932     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11933     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11934     const Expr *IfCond, const OMPTaskDataTy &Data) {
11935   llvm_unreachable("Not supported in SIMD-only mode");
11936 }
11937 
11938 void CGOpenMPSIMDRuntime::emitReduction(
11939     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11940     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11941     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11942   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11943   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11944                                  ReductionOps, Options);
11945 }
11946 
11947 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11948     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11949     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11950   llvm_unreachable("Not supported in SIMD-only mode");
11951 }
11952 
11953 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11954                                                 SourceLocation Loc,
11955                                                 bool IsWorksharingReduction) {
11956   llvm_unreachable("Not supported in SIMD-only mode");
11957 }
11958 
11959 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11960                                                   SourceLocation Loc,
11961                                                   ReductionCodeGen &RCG,
11962                                                   unsigned N) {
11963   llvm_unreachable("Not supported in SIMD-only mode");
11964 }
11965 
11966 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11967                                                   SourceLocation Loc,
11968                                                   llvm::Value *ReductionsPtr,
11969                                                   LValue SharedLVal) {
11970   llvm_unreachable("Not supported in SIMD-only mode");
11971 }
11972 
11973 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11974                                            SourceLocation Loc) {
11975   llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977 
11978 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11979     CodeGenFunction &CGF, SourceLocation Loc,
11980     OpenMPDirectiveKind CancelRegion) {
11981   llvm_unreachable("Not supported in SIMD-only mode");
11982 }
11983 
11984 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11985                                          SourceLocation Loc, const Expr *IfCond,
11986                                          OpenMPDirectiveKind CancelRegion) {
11987   llvm_unreachable("Not supported in SIMD-only mode");
11988 }
11989 
11990 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11991     const OMPExecutableDirective &D, StringRef ParentName,
11992     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11993     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11994   llvm_unreachable("Not supported in SIMD-only mode");
11995 }
11996 
11997 void CGOpenMPSIMDRuntime::emitTargetCall(
11998     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11999     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12000     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12001     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12002                                      const OMPLoopDirective &D)>
12003         SizeEmitter) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
12007 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12008   llvm_unreachable("Not supported in SIMD-only mode");
12009 }
12010 
12011 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12012   llvm_unreachable("Not supported in SIMD-only mode");
12013 }
12014 
12015 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12016   return false;
12017 }
12018 
12019 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12020                                         const OMPExecutableDirective &D,
12021                                         SourceLocation Loc,
12022                                         llvm::Function *OutlinedFn,
12023                                         ArrayRef<llvm::Value *> CapturedVars) {
12024   llvm_unreachable("Not supported in SIMD-only mode");
12025 }
12026 
12027 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12028                                              const Expr *NumTeams,
12029                                              const Expr *ThreadLimit,
12030                                              SourceLocation Loc) {
12031   llvm_unreachable("Not supported in SIMD-only mode");
12032 }
12033 
12034 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12035     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12036     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12037   llvm_unreachable("Not supported in SIMD-only mode");
12038 }
12039 
12040 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12041     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12042     const Expr *Device) {
12043   llvm_unreachable("Not supported in SIMD-only mode");
12044 }
12045 
12046 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12047                                            const OMPLoopDirective &D,
12048                                            ArrayRef<Expr *> NumIterations) {
12049   llvm_unreachable("Not supported in SIMD-only mode");
12050 }
12051 
12052 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12053                                               const OMPDependClause *C) {
12054   llvm_unreachable("Not supported in SIMD-only mode");
12055 }
12056 
12057 const VarDecl *
12058 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12059                                         const VarDecl *NativeParam) const {
12060   llvm_unreachable("Not supported in SIMD-only mode");
12061 }
12062 
12063 Address
12064 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12065                                          const VarDecl *NativeParam,
12066                                          const VarDecl *TargetParam) const {
12067   llvm_unreachable("Not supported in SIMD-only mode");
12068 }
12069