1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
890   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
891     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
892                                      PrivateAddr, SharedLVal.getAddress(CGF),
893                                      SharedLVal.getType());
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   const auto *PrivateVD =
904       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
905   QualType PrivateType = PrivateVD->getType();
906   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
907   return DTorKind != QualType::DK_none;
908 }
909 
910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
911                                     Address PrivateAddr) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   if (needCleanups(N)) {
917     PrivateAddr = CGF.Builder.CreateElementBitCast(
918         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
919     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
920   }
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
930     } else {
931       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
932       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
938                                        CGF.ConvertTypeForMem(ElTy)),
939       BaseLV.getType(), BaseLV.getBaseInfo(),
940       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
941 }
942 
943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
944                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
945                           llvm::Value *Addr) {
946   Address Tmp = Address::invalid();
947   Address TopTmp = Address::invalid();
948   Address MostTopTmp = Address::invalid();
949   BaseTy = BaseTy.getNonReferenceType();
950   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
951          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
952     Tmp = CGF.CreateMemTemp(BaseTy);
953     if (TopTmp.isValid())
954       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
955     else
956       MostTopTmp = Tmp;
957     TopTmp = Tmp;
958     BaseTy = BaseTy->getPointeeType();
959   }
960   llvm::Type *Ty = BaseLVType;
961   if (Tmp.isValid())
962     Ty = Tmp.getElementType();
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
964   if (Tmp.isValid()) {
965     CGF.Builder.CreateStore(Addr, Tmp);
966     return MostTopTmp;
967   }
968   return Address(Addr, BaseLVAlignment);
969 }
970 
971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
972   const VarDecl *OrigVD = nullptr;
973   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
974     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
975     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
976       Base = TempOASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
978       Base = TempASE->getBase()->IgnoreParenImpCasts();
979     DE = cast<DeclRefExpr>(Base);
980     OrigVD = cast<VarDecl>(DE->getDecl());
981   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
982     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
984       Base = TempASE->getBase()->IgnoreParenImpCasts();
985     DE = cast<DeclRefExpr>(Base);
986     OrigVD = cast<VarDecl>(DE->getDecl());
987   }
988   return OrigVD;
989 }
990 
991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
992                                                Address PrivateAddr) {
993   const DeclRefExpr *DE;
994   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
995     BaseDecls.emplace_back(OrigVD);
996     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
997     LValue BaseLValue =
998         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
999                     OriginalBaseLValue);
1000     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1001         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1002     llvm::Value *PrivatePointer =
1003         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1004             PrivateAddr.getPointer(),
1005             SharedAddresses[N].first.getAddress(CGF).getType());
1006     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1007     return castToBase(CGF, OrigVD->getType(),
1008                       SharedAddresses[N].first.getType(),
1009                       OriginalBaseLValue.getAddress(CGF).getType(),
1010                       OriginalBaseLValue.getAlignment(), Ptr);
1011   }
1012   BaseDecls.emplace_back(
1013       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1014   return PrivateAddr;
1015 }
1016 
1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1018   const OMPDeclareReductionDecl *DRD =
1019       getReductionInit(ClausesData[N].ReductionOp);
1020   return DRD && DRD->getInitializer();
1021 }
1022 
1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1024   return CGF.EmitLoadOfPointerLValue(
1025       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1026       getThreadIDVariable()->getType()->castAs<PointerType>());
1027 }
1028 
1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1030   if (!CGF.HaveInsertPoint())
1031     return;
1032   // 1.2.2 OpenMP Language Terminology
1033   // Structured block - An executable statement with a single entry at the
1034   // top and a single exit at the bottom.
1035   // The point of exit cannot be a branch out of the structured block.
1036   // longjmp() and throw() must not violate the entry/exit criteria.
1037   CGF.EHStack.pushTerminate();
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OffloadEntriesInfoManager(CGM) {
1064   ASTContext &C = CGM.getContext();
1065   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1066   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1067   RD->startDefinition();
1068   // reserved_1
1069   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1070   // flags
1071   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1072   // reserved_2
1073   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1074   // reserved_3
1075   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1076   // psource
1077   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1078   RD->completeDefinition();
1079   IdentQTy = C.getRecordType(RD);
1080   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1081   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1082 
1083   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1084   llvm::omp::types::initializeTypes(CGM.getModule());
1085   loadOffloadInfoMetadata();
1086 }
1087 
1088 void CGOpenMPRuntime::clear() {
1089   InternalVars.clear();
1090   // Clean non-target variable declarations possibly used only in debug info.
1091   for (const auto &Data : EmittedNonTargetVariables) {
1092     if (!Data.getValue().pointsToAliveValue())
1093       continue;
1094     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1095     if (!GV)
1096       continue;
1097     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1098       continue;
1099     GV->eraseFromParent();
1100   }
1101 }
1102 
1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1104   SmallString<128> Buffer;
1105   llvm::raw_svector_ostream OS(Buffer);
1106   StringRef Sep = FirstSeparator;
1107   for (StringRef Part : Parts) {
1108     OS << Sep << Part;
1109     Sep = Separator;
1110   }
1111   return std::string(OS.str());
1112 }
1113 
1114 static llvm::Function *
1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1116                           const Expr *CombinerInitializer, const VarDecl *In,
1117                           const VarDecl *Out, bool IsCombiner) {
1118   // void .omp_combiner.(Ty *in, Ty *out);
1119   ASTContext &C = CGM.getContext();
1120   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1121   FunctionArgList Args;
1122   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1123                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1125                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   Args.push_back(&OmpOutParm);
1127   Args.push_back(&OmpInParm);
1128   const CGFunctionInfo &FnInfo =
1129       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1130   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1131   std::string Name = CGM.getOpenMPRuntime().getName(
1132       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1133   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1134                                     Name, &CGM.getModule());
1135   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1136   if (CGM.getLangOpts().Optimize) {
1137     Fn->removeFnAttr(llvm::Attribute::NoInline);
1138     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1139     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1140   }
1141   CodeGenFunction CGF(CGM);
1142   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1143   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1144   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1145                     Out->getLocation());
1146   CodeGenFunction::OMPPrivateScope Scope(CGF);
1147   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1148   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1153   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1154     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1155         .getAddress(CGF);
1156   });
1157   (void)Scope.Privatize();
1158   if (!IsCombiner && Out->hasInit() &&
1159       !CGF.isTrivialInitializer(Out->getInit())) {
1160     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1161                          Out->getType().getQualifiers(),
1162                          /*IsInitializer=*/true);
1163   }
1164   if (CombinerInitializer)
1165     CGF.EmitIgnoredExpr(CombinerInitializer);
1166   Scope.ForceCleanup();
1167   CGF.FinishFunction();
1168   return Fn;
1169 }
1170 
1171 void CGOpenMPRuntime::emitUserDefinedReduction(
1172     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1173   if (UDRMap.count(D) > 0)
1174     return;
1175   llvm::Function *Combiner = emitCombinerOrInitializer(
1176       CGM, D->getType(), D->getCombiner(),
1177       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1178       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1179       /*IsCombiner=*/true);
1180   llvm::Function *Initializer = nullptr;
1181   if (const Expr *Init = D->getInitializer()) {
1182     Initializer = emitCombinerOrInitializer(
1183         CGM, D->getType(),
1184         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1185                                                                      : nullptr,
1186         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1187         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1188         /*IsCombiner=*/false);
1189   }
1190   UDRMap.try_emplace(D, Combiner, Initializer);
1191   if (CGF) {
1192     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1193     Decls.second.push_back(D);
1194   }
1195 }
1196 
1197 std::pair<llvm::Function *, llvm::Function *>
1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1199   auto I = UDRMap.find(D);
1200   if (I != UDRMap.end())
1201     return I->second;
1202   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1203   return UDRMap.lookup(D);
1204 }
1205 
1206 namespace {
1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1208 // Builder if one is present.
1209 struct PushAndPopStackRAII {
1210   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1211                       bool HasCancel)
1212       : OMPBuilder(OMPBuilder) {
1213     if (!OMPBuilder)
1214       return;
1215 
1216     // The following callback is the crucial part of clangs cleanup process.
1217     //
1218     // NOTE:
1219     // Once the OpenMPIRBuilder is used to create parallel regions (and
1220     // similar), the cancellation destination (Dest below) is determined via
1221     // IP. That means if we have variables to finalize we split the block at IP,
1222     // use the new block (=BB) as destination to build a JumpDest (via
1223     // getJumpDestInCurrentScope(BB)) which then is fed to
1224     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1225     // to push & pop an FinalizationInfo object.
1226     // The FiniCB will still be needed but at the point where the
1227     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1228     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1229       assert(IP.getBlock()->end() == IP.getPoint() &&
1230              "Clang CG should cause non-terminated block!");
1231       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1232       CGF.Builder.restoreIP(IP);
1233       CodeGenFunction::JumpDest Dest =
1234           CGF.getOMPCancelDestination(OMPD_parallel);
1235       CGF.EmitBranchThroughCleanup(Dest);
1236     };
1237 
1238     // TODO: Remove this once we emit parallel regions through the
1239     //       OpenMPIRBuilder as it can do this setup internally.
1240     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1241         {FiniCB, OMPD_parallel, HasCancel});
1242     OMPBuilder->pushFinalizationCB(std::move(FI));
1243   }
1244   ~PushAndPopStackRAII() {
1245     if (OMPBuilder)
1246       OMPBuilder->popFinalizationCB();
1247   }
1248   llvm::OpenMPIRBuilder *OMPBuilder;
1249 };
1250 } // namespace
1251 
1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1253     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1254     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1255     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1256   assert(ThreadIDVar->getType()->isPointerType() &&
1257          "thread id variable must be of type kmp_int32 *");
1258   CodeGenFunction CGF(CGM, true);
1259   bool HasCancel = false;
1260   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1261     HasCancel = OPD->hasCancel();
1262   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1265     HasCancel = OPSD->hasCancel();
1266   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278 
1279   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1280   //       parallel region to make cancellation barriers work properly.
1281   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1282   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1283   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1284                                     HasCancel, OutlinedHelperName);
1285   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1286   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1287 }
1288 
1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1290     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1291     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1292   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1293   return emitParallelOrTeamsOutlinedFunction(
1294       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1295 }
1296 
1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1298     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1300   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1301   return emitParallelOrTeamsOutlinedFunction(
1302       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1303 }
1304 
1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1306     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1307     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1308     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1309     bool Tied, unsigned &NumberOfParts) {
1310   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1311                                               PrePostActionTy &) {
1312     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1313     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1314     llvm::Value *TaskArgs[] = {
1315         UpLoc, ThreadID,
1316         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1317                                     TaskTVar->getType()->castAs<PointerType>())
1318             .getPointer(CGF)};
1319     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1320                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1321                         TaskArgs);
1322   };
1323   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1324                                                             UntiedCodeGen);
1325   CodeGen.setAction(Action);
1326   assert(!ThreadIDVar->getType()->isPointerType() &&
1327          "thread id variable must be of type kmp_int32 for tasks");
1328   const OpenMPDirectiveKind Region =
1329       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1330                                                       : OMPD_task;
1331   const CapturedStmt *CS = D.getCapturedStmt(Region);
1332   bool HasCancel = false;
1333   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341 
1342   CodeGenFunction CGF(CGM, true);
1343   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1344                                         InnermostKind, HasCancel, Action);
1345   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1346   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1347   if (!Tied)
1348     NumberOfParts = Action.getNumberOfParts();
1349   return Res;
1350 }
1351 
1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1353                              const RecordDecl *RD, const CGRecordLayout &RL,
1354                              ArrayRef<llvm::Constant *> Data) {
1355   llvm::StructType *StructTy = RL.getLLVMType();
1356   unsigned PrevIdx = 0;
1357   ConstantInitBuilder CIBuilder(CGM);
1358   auto DI = Data.begin();
1359   for (const FieldDecl *FD : RD->fields()) {
1360     unsigned Idx = RL.getLLVMFieldNo(FD);
1361     // Fill the alignment.
1362     for (unsigned I = PrevIdx; I < Idx; ++I)
1363       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1364     PrevIdx = Idx + 1;
1365     Fields.add(*DI);
1366     ++DI;
1367   }
1368 }
1369 
1370 template <class... As>
1371 static llvm::GlobalVariable *
1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1373                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1374                    As &&... Args) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantInitBuilder CIBuilder(CGM);
1378   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1379   buildStructValue(Fields, CGM, RD, RL, Data);
1380   return Fields.finishAndCreateGlobal(
1381       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1382       std::forward<As>(Args)...);
1383 }
1384 
1385 template <typename T>
1386 static void
1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1388                                          ArrayRef<llvm::Constant *> Data,
1389                                          T &Parent) {
1390   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1391   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1392   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1393   buildStructValue(Fields, CGM, RD, RL, Data);
1394   Fields.finishAndAddTo(Parent);
1395 }
1396 
1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1398   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1399   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1400   FlagsTy FlagsKey(Flags, Reserved2Flags);
1401   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1402   if (!Entry) {
1403     if (!DefaultOpenMPPSource) {
1404       // Initialize default location for psource field of ident_t structure of
1405       // all ident_t objects. Format is ";file;function;line;column;;".
1406       // Taken from
1407       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1408       DefaultOpenMPPSource =
1409           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1410       DefaultOpenMPPSource =
1411           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1412     }
1413 
1414     llvm::Constant *Data[] = {
1415         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1416         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1417         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1419     llvm::GlobalValue *DefaultOpenMPLocation =
1420         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1421                            llvm::GlobalValue::PrivateLinkage);
1422     DefaultOpenMPLocation->setUnnamedAddr(
1423         llvm::GlobalValue::UnnamedAddr::Global);
1424 
1425     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1426   }
1427   return Address(Entry, Align);
1428 }
1429 
1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1431                                              bool AtCurrentPoint) {
1432   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1433   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1434 
1435   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1436   if (AtCurrentPoint) {
1437     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1438         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1439   } else {
1440     Elem.second.ServiceInsertPt =
1441         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1442     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1443   }
1444 }
1445 
1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1447   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1448   if (Elem.second.ServiceInsertPt) {
1449     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1450     Elem.second.ServiceInsertPt = nullptr;
1451     Ptr->eraseFromParent();
1452   }
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1456                                                  SourceLocation Loc,
1457                                                  unsigned Flags) {
1458   Flags |= OMP_IDENT_KMPC;
1459   // If no debug info is generated - return global default location.
1460   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1461       Loc.isInvalid())
1462     return getOrCreateDefaultLocation(Flags).getPointer();
1463 
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 
1466   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1467   Address LocValue = Address::invalid();
1468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1469   if (I != OpenMPLocThreadIDMap.end())
1470     LocValue = Address(I->second.DebugLoc, Align);
1471 
1472   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1473   // GetOpenMPThreadID was called before this routine.
1474   if (!LocValue.isValid()) {
1475     // Generate "ident_t .kmpc_loc.addr;"
1476     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1477     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1478     Elem.second.DebugLoc = AI.getPointer();
1479     LocValue = AI;
1480 
1481     if (!Elem.second.ServiceInsertPt)
1482       setLocThreadIdInsertPt(CGF);
1483     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1484     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1485     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1486                              CGF.getTypeSize(IdentQTy));
1487   }
1488 
1489   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1490   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1491   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1492   LValue PSource =
1493       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1494 
1495   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1496   if (OMPDebugLoc == nullptr) {
1497     SmallString<128> Buffer2;
1498     llvm::raw_svector_ostream OS2(Buffer2);
1499     // Build debug location
1500     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1501     OS2 << ";" << PLoc.getFilename() << ";";
1502     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1503       OS2 << FD->getQualifiedNameAsString();
1504     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1505     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1506     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1507   }
1508   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1509   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1510 
1511   // Our callers always pass this to a runtime function, so for
1512   // convenience, go ahead and return a naked pointer.
1513   return LocValue.getPointer();
1514 }
1515 
1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1517                                           SourceLocation Loc) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519 
1520   llvm::Value *ThreadID = nullptr;
1521   // Check whether we've already cached a load of the thread id in this
1522   // function.
1523   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1524   if (I != OpenMPLocThreadIDMap.end()) {
1525     ThreadID = I->second.ThreadID;
1526     if (ThreadID != nullptr)
1527       return ThreadID;
1528   }
1529   // If exceptions are enabled, do not use parameter to avoid possible crash.
1530   if (auto *OMPRegionInfo =
1531           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1532     if (OMPRegionInfo->getThreadIDVariable()) {
1533       // Check if this an outlined function with thread id passed as argument.
1534       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1535       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1536       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1537           !CGF.getLangOpts().CXXExceptions ||
1538           CGF.Builder.GetInsertBlock() == TopBlock ||
1539           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1540           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1541               TopBlock ||
1542           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1543               CGF.Builder.GetInsertBlock()) {
1544         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1545         // If value loaded in entry block, cache it and use it everywhere in
1546         // function.
1547         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1548           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549           Elem.second.ThreadID = ThreadID;
1550         }
1551         return ThreadID;
1552       }
1553     }
1554   }
1555 
1556   // This is not an outlined function region - need to call __kmpc_int32
1557   // kmpc_global_thread_num(ident_t *loc).
1558   // Generate thread id value and cache this value for use across the
1559   // function.
1560   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1561   if (!Elem.second.ServiceInsertPt)
1562     setLocThreadIdInsertPt(CGF);
1563   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1564   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1565   llvm::CallInst *Call = CGF.Builder.CreateCall(
1566       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1567           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1568       emitUpdateLocation(CGF, Loc));
1569   Call->setCallingConv(CGF.getRuntimeCC());
1570   Elem.second.ThreadID = Call;
1571   return Call;
1572 }
1573 
1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1577     clearLocThreadIdInsertPt(CGF);
1578     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1579   }
1580   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1581     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1582       UDRMap.erase(D);
1583     FunctionUDRMap.erase(CGF.CurFn);
1584   }
1585   auto I = FunctionUDMMap.find(CGF.CurFn);
1586   if (I != FunctionUDMMap.end()) {
1587     for(const auto *D : I->second)
1588       UDMMap.erase(D);
1589     FunctionUDMMap.erase(I);
1590   }
1591   LastprivateConditionalToTypes.erase(CGF.CurFn);
1592 }
1593 
1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1595   return IdentTy->getPointerTo();
1596 }
1597 
1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1599   if (!Kmpc_MicroTy) {
1600     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1601     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1602                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1603     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1604   }
1605   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1606 }
1607 
1608 llvm::FunctionCallee
1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1610   assert((IVSize == 32 || IVSize == 64) &&
1611          "IV size is not compatible with the omp runtime");
1612   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1613                                             : "__kmpc_for_static_init_4u")
1614                                 : (IVSigned ? "__kmpc_for_static_init_8"
1615                                             : "__kmpc_for_static_init_8u");
1616   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1617   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1618   llvm::Type *TypeParams[] = {
1619     getIdentTyPointerTy(),                     // loc
1620     CGM.Int32Ty,                               // tid
1621     CGM.Int32Ty,                               // schedtype
1622     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1623     PtrTy,                                     // p_lower
1624     PtrTy,                                     // p_upper
1625     PtrTy,                                     // p_stride
1626     ITy,                                       // incr
1627     ITy                                        // chunk
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 llvm::FunctionCallee
1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1636   assert((IVSize == 32 || IVSize == 64) &&
1637          "IV size is not compatible with the omp runtime");
1638   StringRef Name =
1639       IVSize == 32
1640           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1641           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1642   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1643   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1644                                CGM.Int32Ty,           // tid
1645                                CGM.Int32Ty,           // schedtype
1646                                ITy,                   // lower
1647                                ITy,                   // upper
1648                                ITy,                   // stride
1649                                ITy                    // chunk
1650   };
1651   auto *FnTy =
1652       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1653   return CGM.CreateRuntimeFunction(FnTy, Name);
1654 }
1655 
1656 llvm::FunctionCallee
1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1658   assert((IVSize == 32 || IVSize == 64) &&
1659          "IV size is not compatible with the omp runtime");
1660   StringRef Name =
1661       IVSize == 32
1662           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1663           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1664   llvm::Type *TypeParams[] = {
1665       getIdentTyPointerTy(), // loc
1666       CGM.Int32Ty,           // tid
1667   };
1668   auto *FnTy =
1669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1670   return CGM.CreateRuntimeFunction(FnTy, Name);
1671 }
1672 
1673 llvm::FunctionCallee
1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1675   assert((IVSize == 32 || IVSize == 64) &&
1676          "IV size is not compatible with the omp runtime");
1677   StringRef Name =
1678       IVSize == 32
1679           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1680           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1681   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1682   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1683   llvm::Type *TypeParams[] = {
1684     getIdentTyPointerTy(),                     // loc
1685     CGM.Int32Ty,                               // tid
1686     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1687     PtrTy,                                     // p_lower
1688     PtrTy,                                     // p_upper
1689     PtrTy                                      // p_stride
1690   };
1691   auto *FnTy =
1692       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1693   return CGM.CreateRuntimeFunction(FnTy, Name);
1694 }
1695 
1696 /// Obtain information that uniquely identifies a target entry. This
1697 /// consists of the file and device IDs as well as line number associated with
1698 /// the relevant entry source location.
1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1700                                      unsigned &DeviceID, unsigned &FileID,
1701                                      unsigned &LineNum) {
1702   SourceManager &SM = C.getSourceManager();
1703 
1704   // The loc should be always valid and have a file ID (the user cannot use
1705   // #pragma directives in macros)
1706 
1707   assert(Loc.isValid() && "Source location is expected to be always valid.");
1708 
1709   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1710   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1711 
1712   llvm::sys::fs::UniqueID ID;
1713   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1714     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1715         << PLoc.getFilename() << EC.message();
1716 
1717   DeviceID = ID.getDevice();
1718   FileID = ID.getFile();
1719   LineNum = PLoc.getLine();
1720 }
1721 
1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1723   if (CGM.getLangOpts().OpenMPSimd)
1724     return Address::invalid();
1725   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1726       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1727   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1728               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1729                HasRequiresUnifiedSharedMemory))) {
1730     SmallString<64> PtrName;
1731     {
1732       llvm::raw_svector_ostream OS(PtrName);
1733       OS << CGM.getMangledName(GlobalDecl(VD));
1734       if (!VD->isExternallyVisible()) {
1735         unsigned DeviceID, FileID, Line;
1736         getTargetEntryUniqueInfo(CGM.getContext(),
1737                                  VD->getCanonicalDecl()->getBeginLoc(),
1738                                  DeviceID, FileID, Line);
1739         OS << llvm::format("_%x", FileID);
1740       }
1741       OS << "_decl_tgt_ref_ptr";
1742     }
1743     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1744     if (!Ptr) {
1745       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1746       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1747                                         PtrName);
1748 
1749       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1750       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1751 
1752       if (!CGM.getLangOpts().OpenMPIsDevice)
1753         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1754       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1755     }
1756     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1757   }
1758   return Address::invalid();
1759 }
1760 
1761 llvm::Constant *
1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1763   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1764          !CGM.getContext().getTargetInfo().isTLSSupported());
1765   // Lookup the entry, lazily creating it if necessary.
1766   std::string Suffix = getName({"cache", ""});
1767   return getOrCreateInternalVariable(
1768       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1769 }
1770 
1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1772                                                 const VarDecl *VD,
1773                                                 Address VDAddr,
1774                                                 SourceLocation Loc) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return VDAddr;
1778 
1779   llvm::Type *VarTy = VDAddr.getElementType();
1780   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1781                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1782                                                        CGM.Int8PtrTy),
1783                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1784                          getOrCreateThreadPrivateCache(VD)};
1785   return Address(CGF.EmitRuntimeCall(
1786                      llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1787                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1788                      Args),
1789                  VDAddr.getAlignment());
1790 }
1791 
1792 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1793     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1794     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1795   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1796   // library.
1797   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1798   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1799                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1800                       OMPLoc);
1801   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1802   // to register constructor/destructor for variable.
1803   llvm::Value *Args[] = {
1804       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1805       Ctor, CopyCtor, Dtor};
1806   CGF.EmitRuntimeCall(
1807       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1808           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1809       Args);
1810 }
1811 
1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1813     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1814     bool PerformInit, CodeGenFunction *CGF) {
1815   if (CGM.getLangOpts().OpenMPUseTLS &&
1816       CGM.getContext().getTargetInfo().isTLSSupported())
1817     return nullptr;
1818 
1819   VD = VD->getDefinition(CGM.getContext());
1820   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1821     QualType ASTTy = VD->getType();
1822 
1823     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1824     const Expr *Init = VD->getAnyInitializer();
1825     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1826       // Generate function that re-emits the declaration's initializer into the
1827       // threadprivate copy of the variable VD
1828       CodeGenFunction CtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidPtrTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_ctor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
1841       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1842                             Args, Loc, Loc);
1843       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1844           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1845           CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1847       Arg = CtorCGF.Builder.CreateElementBitCast(
1848           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1849       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1850                                /*IsInitializer=*/true);
1851       ArgVal = CtorCGF.EmitLoadOfScalar(
1852           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1853           CGM.getContext().VoidPtrTy, Dst.getLocation());
1854       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1855       CtorCGF.FinishFunction();
1856       Ctor = Fn;
1857     }
1858     if (VD->getType().isDestructedType() != QualType::DK_none) {
1859       // Generate function that emits destructor call for the threadprivate copy
1860       // of the variable VD
1861       CodeGenFunction DtorCGF(CGM);
1862       FunctionArgList Args;
1863       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1864                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1865                             ImplicitParamDecl::Other);
1866       Args.push_back(&Dst);
1867 
1868       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1869           CGM.getContext().VoidTy, Args);
1870       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1871       std::string Name = getName({"__kmpc_global_dtor_", ""});
1872       llvm::Function *Fn =
1873           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
1874       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1875       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1876                             Loc, Loc);
1877       // Create a scope with an artificial location for the body of this function.
1878       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1879       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1880           DtorCGF.GetAddrOfLocalVar(&Dst),
1881           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1882       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1883                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1884                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1885       DtorCGF.FinishFunction();
1886       Dtor = Fn;
1887     }
1888     // Do not emit init function if it is not required.
1889     if (!Ctor && !Dtor)
1890       return nullptr;
1891 
1892     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1893     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1894                                                /*isVarArg=*/false)
1895                            ->getPointerTo();
1896     // Copying constructor for the threadprivate variable.
1897     // Must be NULL - reserved by runtime, but currently it requires that this
1898     // parameter is always NULL. Otherwise it fires assertion.
1899     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1900     if (Ctor == nullptr) {
1901       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1902                                              /*isVarArg=*/false)
1903                          ->getPointerTo();
1904       Ctor = llvm::Constant::getNullValue(CtorTy);
1905     }
1906     if (Dtor == nullptr) {
1907       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1908                                              /*isVarArg=*/false)
1909                          ->getPointerTo();
1910       Dtor = llvm::Constant::getNullValue(DtorTy);
1911     }
1912     if (!CGF) {
1913       auto *InitFunctionTy =
1914           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1915       std::string Name = getName({"__omp_threadprivate_init_", ""});
1916       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1917           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1918       CodeGenFunction InitCGF(CGM);
1919       FunctionArgList ArgList;
1920       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1921                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1922                             Loc, Loc);
1923       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1924       InitCGF.FinishFunction();
1925       return InitFunction;
1926     }
1927     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928   }
1929   return nullptr;
1930 }
1931 
1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1933                                                      llvm::GlobalVariable *Addr,
1934                                                      bool PerformInit) {
1935   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1936       !CGM.getLangOpts().OpenMPIsDevice)
1937     return false;
1938   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1939       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1940   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1941       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1942        HasRequiresUnifiedSharedMemory))
1943     return CGM.getLangOpts().OpenMPIsDevice;
1944   VD = VD->getDefinition(CGM.getContext());
1945   assert(VD && "Unknown VarDecl");
1946 
1947   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1948     return CGM.getLangOpts().OpenMPIsDevice;
1949 
1950   QualType ASTTy = VD->getType();
1951   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1952 
1953   // Produce the unique prefix to identify the new target regions. We use
1954   // the source location of the variable declaration which we know to not
1955   // conflict with any target region.
1956   unsigned DeviceID;
1957   unsigned FileID;
1958   unsigned Line;
1959   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1960   SmallString<128> Buffer, Out;
1961   {
1962     llvm::raw_svector_ostream OS(Buffer);
1963     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1964        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1965   }
1966 
1967   const Expr *Init = VD->getAnyInitializer();
1968   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1969     llvm::Constant *Ctor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that re-emits the declaration's initializer into
1973       // the threadprivate copy of the variable VD
1974       CodeGenFunction CtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
1979           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1981       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1984       CtorCGF.EmitAnyExprToMem(Init,
1985                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1986                                Init->getType().getQualifiers(),
1987                                /*IsInitializer=*/true);
1988       CtorCGF.FinishFunction();
1989       Ctor = Fn;
1990       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1991       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1992     } else {
1993       Ctor = new llvm::GlobalVariable(
1994           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1995           llvm::GlobalValue::PrivateLinkage,
1996           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1997       ID = Ctor;
1998     }
1999 
2000     // Register the information for the entry associated with the constructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2005   }
2006   if (VD->getType().isDestructedType() != QualType::DK_none) {
2007     llvm::Constant *Dtor;
2008     llvm::Constant *ID;
2009     if (CGM.getLangOpts().OpenMPIsDevice) {
2010       // Generate function that emits destructor call for the threadprivate
2011       // copy of the variable VD
2012       CodeGenFunction DtorCGF(CGM);
2013 
2014       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2015       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2016       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2017           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2018       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2019       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2020                             FunctionArgList(), Loc, Loc);
2021       // Create a scope with an artificial location for the body of this
2022       // function.
2023       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2024       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2025                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2026                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2027       DtorCGF.FinishFunction();
2028       Dtor = Fn;
2029       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2030       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2031     } else {
2032       Dtor = new llvm::GlobalVariable(
2033           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2034           llvm::GlobalValue::PrivateLinkage,
2035           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2036       ID = Dtor;
2037     }
2038     // Register the information for the entry associated with the destructor.
2039     Out.clear();
2040     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2041         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2042         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2043   }
2044   return CGM.getLangOpts().OpenMPIsDevice;
2045 }
2046 
2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2048                                                           QualType VarType,
2049                                                           StringRef Name) {
2050   std::string Suffix = getName({"artificial", ""});
2051   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2052   llvm::Value *GAddr =
2053       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2054   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2055       CGM.getTarget().isTLSSupported()) {
2056     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2057     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2058   }
2059   std::string CacheSuffix = getName({"cache", ""});
2060   llvm::Value *Args[] = {
2061       emitUpdateLocation(CGF, SourceLocation()),
2062       getThreadID(CGF, SourceLocation()),
2063       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2064       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2065                                 /*isSigned=*/false),
2066       getOrCreateInternalVariable(
2067           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2068   return Address(
2069       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2070           CGF.EmitRuntimeCall(
2071               llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2072                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2073               Args),
2074           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2075       CGM.getContext().getTypeAlignInChars(VarType));
2076 }
2077 
2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2079                                    const RegionCodeGenTy &ThenGen,
2080                                    const RegionCodeGenTy &ElseGen) {
2081   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2082 
2083   // If the condition constant folds and can be elided, try to avoid emitting
2084   // the condition and the dead arm of the if/else.
2085   bool CondConstant;
2086   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2087     if (CondConstant)
2088       ThenGen(CGF);
2089     else
2090       ElseGen(CGF);
2091     return;
2092   }
2093 
2094   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2095   // emit the conditional branch.
2096   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2097   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2098   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2099   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2100 
2101   // Emit the 'then' code.
2102   CGF.EmitBlock(ThenBlock);
2103   ThenGen(CGF);
2104   CGF.EmitBranch(ContBlock);
2105   // Emit the 'else' code if present.
2106   // There is no need to emit line number for unconditional branch.
2107   (void)ApplyDebugLocation::CreateEmpty(CGF);
2108   CGF.EmitBlock(ElseBlock);
2109   ElseGen(CGF);
2110   // There is no need to emit line number for unconditional branch.
2111   (void)ApplyDebugLocation::CreateEmpty(CGF);
2112   CGF.EmitBranch(ContBlock);
2113   // Emit the continuation block for code after the if.
2114   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2115 }
2116 
2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2118                                        llvm::Function *OutlinedFn,
2119                                        ArrayRef<llvm::Value *> CapturedVars,
2120                                        const Expr *IfCond) {
2121   if (!CGF.HaveInsertPoint())
2122     return;
2123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2124   auto &M = CGM.getModule();
2125   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2126                                                          PrePostActionTy &) {
2127     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2128     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2129     llvm::Value *Args[] = {
2130         RTLoc,
2131         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2132         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2133     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2134     RealArgs.append(std::begin(Args), std::end(Args));
2135     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2136 
2137     llvm::FunctionCallee RTLFn =
2138         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2139             M, OMPRTL___kmpc_fork_call);
2140     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2141   };
2142   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2143                     Loc](CodeGenFunction &CGF, PrePostActionTy &) {
2144     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2145     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2146     // Build calls:
2147     // __kmpc_serialized_parallel(&Loc, GTid);
2148     llvm::Value *Args[] = {RTLoc, ThreadID};
2149     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2150                             M, OMPRTL___kmpc_serialized_parallel),
2151                         Args);
2152 
2153     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2154     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2155     Address ZeroAddrBound =
2156         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2157                                          /*Name=*/".bound.zero.addr");
2158     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2159     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2160     // ThreadId for serialized parallels is 0.
2161     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2162     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2163     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2164     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2165 
2166     // __kmpc_end_serialized_parallel(&Loc, GTid);
2167     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2168     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2169                             M, OMPRTL___kmpc_end_serialized_parallel),
2170                         EndArgs);
2171   };
2172   if (IfCond) {
2173     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2174   } else {
2175     RegionCodeGenTy ThenRCG(ThenGen);
2176     ThenRCG(CGF);
2177   }
2178 }
2179 
2180 // If we're inside an (outlined) parallel region, use the region info's
2181 // thread-ID variable (it is passed in a first argument of the outlined function
2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2183 // regular serial code region, get thread ID by calling kmp_int32
2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2185 // return the address of that temp.
2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2187                                              SourceLocation Loc) {
2188   if (auto *OMPRegionInfo =
2189           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2190     if (OMPRegionInfo->getThreadIDVariable())
2191       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2192 
2193   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2194   QualType Int32Ty =
2195       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2196   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2197   CGF.EmitStoreOfScalar(ThreadID,
2198                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2199 
2200   return ThreadIDTemp;
2201 }
2202 
2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2204     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2205   SmallString<256> Buffer;
2206   llvm::raw_svector_ostream Out(Buffer);
2207   Out << Name;
2208   StringRef RuntimeName = Out.str();
2209   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2210   if (Elem.second) {
2211     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2212            "OMP internal variable has different type than requested");
2213     return &*Elem.second;
2214   }
2215 
2216   return Elem.second = new llvm::GlobalVariable(
2217              CGM.getModule(), Ty, /*IsConstant*/ false,
2218              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2219              Elem.first(), /*InsertBefore=*/nullptr,
2220              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2221 }
2222 
2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2224   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2225   std::string Name = getName({Prefix, "var"});
2226   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2227 }
2228 
2229 namespace {
2230 /// Common pre(post)-action for different OpenMP constructs.
2231 class CommonActionTy final : public PrePostActionTy {
2232   llvm::FunctionCallee EnterCallee;
2233   ArrayRef<llvm::Value *> EnterArgs;
2234   llvm::FunctionCallee ExitCallee;
2235   ArrayRef<llvm::Value *> ExitArgs;
2236   bool Conditional;
2237   llvm::BasicBlock *ContBlock = nullptr;
2238 
2239 public:
2240   CommonActionTy(llvm::FunctionCallee EnterCallee,
2241                  ArrayRef<llvm::Value *> EnterArgs,
2242                  llvm::FunctionCallee ExitCallee,
2243                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2244       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2245         ExitArgs(ExitArgs), Conditional(Conditional) {}
2246   void Enter(CodeGenFunction &CGF) override {
2247     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2248     if (Conditional) {
2249       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2250       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2251       ContBlock = CGF.createBasicBlock("omp_if.end");
2252       // Generate the branch (If-stmt)
2253       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2254       CGF.EmitBlock(ThenBlock);
2255     }
2256   }
2257   void Done(CodeGenFunction &CGF) {
2258     // Emit the rest of blocks/branches
2259     CGF.EmitBranch(ContBlock);
2260     CGF.EmitBlock(ContBlock, true);
2261   }
2262   void Exit(CodeGenFunction &CGF) override {
2263     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2264   }
2265 };
2266 } // anonymous namespace
2267 
2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2269                                          StringRef CriticalName,
2270                                          const RegionCodeGenTy &CriticalOpGen,
2271                                          SourceLocation Loc, const Expr *Hint) {
2272   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2273   // CriticalOpGen();
2274   // __kmpc_end_critical(ident_t *, gtid, Lock);
2275   // Prepare arguments and build a call to __kmpc_critical
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2279                          getCriticalRegionLock(CriticalName)};
2280   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2281                                                 std::end(Args));
2282   if (Hint) {
2283     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2284         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2285   }
2286   CommonActionTy Action(
2287       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2288           CGM.getModule(),
2289           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2290       EnterArgs,
2291       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2292           CGM.getModule(), OMPRTL___kmpc_end_critical),
2293       Args);
2294   CriticalOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2296 }
2297 
2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2299                                        const RegionCodeGenTy &MasterOpGen,
2300                                        SourceLocation Loc) {
2301   if (!CGF.HaveInsertPoint())
2302     return;
2303   // if(__kmpc_master(ident_t *, gtid)) {
2304   //   MasterOpGen();
2305   //   __kmpc_end_master(ident_t *, gtid);
2306   // }
2307   // Prepare arguments and build a call to __kmpc_master
2308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2309   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2310                             CGM.getModule(), OMPRTL___kmpc_master),
2311                         Args,
2312                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_end_master),
2314                         Args,
2315                         /*Conditional=*/true);
2316   MasterOpGen.setAction(Action);
2317   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2318   Action.Done(CGF);
2319 }
2320 
2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2322                                         SourceLocation Loc) {
2323   if (!CGF.HaveInsertPoint())
2324     return;
2325   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2326   if (OMPBuilder) {
2327     OMPBuilder->CreateTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2582   if (OMPBuilder) {
2583     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
2584         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2585     return;
2586   }
2587 
2588   if (!CGF.HaveInsertPoint())
2589     return;
2590   // Build call __kmpc_cancel_barrier(loc, thread_id);
2591   // Build call __kmpc_barrier(loc, thread_id);
2592   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2593   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2594   // thread_id);
2595   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2596                          getThreadID(CGF, Loc)};
2597   if (OMPRegionInfo) {
2598     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2599       llvm::Value *Result = CGF.EmitRuntimeCall(
2600           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2601               CGM.getModule(), OMPRTL___kmpc_cancel_barrier),
2602           Args);
2603       if (EmitChecks) {
2604         // if (__kmpc_cancel_barrier()) {
2605         //   exit from construct;
2606         // }
2607         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2608         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2609         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2610         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2611         CGF.EmitBlock(ExitBB);
2612         //   exit from construct;
2613         CodeGenFunction::JumpDest CancelDestination =
2614             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2615         CGF.EmitBranchThroughCleanup(CancelDestination);
2616         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2617       }
2618       return;
2619     }
2620   }
2621   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2622                           CGM.getModule(), OMPRTL___kmpc_barrier),
2623                       Args);
2624 }
2625 
2626 /// Map the OpenMP loop schedule to the runtime enumeration.
2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2628                                           bool Chunked, bool Ordered) {
2629   switch (ScheduleKind) {
2630   case OMPC_SCHEDULE_static:
2631     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2632                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2633   case OMPC_SCHEDULE_dynamic:
2634     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2635   case OMPC_SCHEDULE_guided:
2636     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2637   case OMPC_SCHEDULE_runtime:
2638     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2639   case OMPC_SCHEDULE_auto:
2640     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2641   case OMPC_SCHEDULE_unknown:
2642     assert(!Chunked && "chunk was specified but schedule kind not known");
2643     return Ordered ? OMP_ord_static : OMP_sch_static;
2644   }
2645   llvm_unreachable("Unexpected runtime schedule");
2646 }
2647 
2648 /// Map the OpenMP distribute schedule to the runtime enumeration.
2649 static OpenMPSchedType
2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2651   // only static is allowed for dist_schedule
2652   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                          bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticNonchunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2669                                       bool Chunked) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2672   return Schedule == OMP_sch_static_chunked;
2673 }
2674 
2675 bool CGOpenMPRuntime::isStaticChunked(
2676     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2677   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2678   return Schedule == OMP_dist_sch_static_chunked;
2679 }
2680 
2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2682   OpenMPSchedType Schedule =
2683       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2684   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2685   return Schedule != OMP_sch_static;
2686 }
2687 
2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2689                                   OpenMPScheduleClauseModifier M1,
2690                                   OpenMPScheduleClauseModifier M2) {
2691   int Modifier = 0;
2692   switch (M1) {
2693   case OMPC_SCHEDULE_MODIFIER_monotonic:
2694     Modifier = OMP_sch_modifier_monotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2697     Modifier = OMP_sch_modifier_nonmonotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_simd:
2700     if (Schedule == OMP_sch_static_chunked)
2701       Schedule = OMP_sch_static_balanced_chunked;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_last:
2704   case OMPC_SCHEDULE_MODIFIER_unknown:
2705     break;
2706   }
2707   switch (M2) {
2708   case OMPC_SCHEDULE_MODIFIER_monotonic:
2709     Modifier = OMP_sch_modifier_monotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2712     Modifier = OMP_sch_modifier_nonmonotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_simd:
2715     if (Schedule == OMP_sch_static_chunked)
2716       Schedule = OMP_sch_static_balanced_chunked;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_last:
2719   case OMPC_SCHEDULE_MODIFIER_unknown:
2720     break;
2721   }
2722   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2723   // If the static schedule kind is specified or if the ordered clause is
2724   // specified, and if the nonmonotonic modifier is not specified, the effect is
2725   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2726   // modifier is specified, the effect is as if the nonmonotonic modifier is
2727   // specified.
2728   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2729     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2730           Schedule == OMP_sch_static_balanced_chunked ||
2731           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2732           Schedule == OMP_dist_sch_static_chunked ||
2733           Schedule == OMP_dist_sch_static))
2734       Modifier = OMP_sch_modifier_nonmonotonic;
2735   }
2736   return Schedule | Modifier;
2737 }
2738 
2739 void CGOpenMPRuntime::emitForDispatchInit(
2740     CodeGenFunction &CGF, SourceLocation Loc,
2741     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2742     bool Ordered, const DispatchRTInput &DispatchValues) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   OpenMPSchedType Schedule = getRuntimeSchedule(
2746       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2747   assert(Ordered ||
2748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2750           Schedule != OMP_sch_static_balanced_chunked));
2751   // Call __kmpc_dispatch_init(
2752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2755 
2756   // If the Chunk was not specified in the clause - use default value 1.
2757   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2758                                             : CGF.Builder.getIntN(IVSize, 1);
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, Loc),
2761       getThreadID(CGF, Loc),
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(
2763           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2764       DispatchValues.LB,                                     // Lower
2765       DispatchValues.UB,                                     // Upper
2766       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2767       Chunk                                                  // Chunk
2768   };
2769   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2770 }
2771 
2772 static void emitForStaticInitCall(
2773     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2774     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2775     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2776     const CGOpenMPRuntime::StaticRTInput &Values) {
2777   if (!CGF.HaveInsertPoint())
2778     return;
2779 
2780   assert(!Values.Ordered);
2781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2782          Schedule == OMP_sch_static_balanced_chunked ||
2783          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2784          Schedule == OMP_dist_sch_static ||
2785          Schedule == OMP_dist_sch_static_chunked);
2786 
2787   // Call __kmpc_for_static_init(
2788   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2789   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2790   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2791   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2792   llvm::Value *Chunk = Values.Chunk;
2793   if (Chunk == nullptr) {
2794     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2795             Schedule == OMP_dist_sch_static) &&
2796            "expected static non-chunked schedule");
2797     // If the Chunk was not specified in the clause - use default value 1.
2798     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2799   } else {
2800     assert((Schedule == OMP_sch_static_chunked ||
2801             Schedule == OMP_sch_static_balanced_chunked ||
2802             Schedule == OMP_ord_static_chunked ||
2803             Schedule == OMP_dist_sch_static_chunked) &&
2804            "expected static chunked schedule");
2805   }
2806   llvm::Value *Args[] = {
2807       UpdateLocation,
2808       ThreadId,
2809       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2810                                                   M2)), // Schedule type
2811       Values.IL.getPointer(),                           // &isLastIter
2812       Values.LB.getPointer(),                           // &LB
2813       Values.UB.getPointer(),                           // &UB
2814       Values.ST.getPointer(),                           // &Stride
2815       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2816       Chunk                                             // Chunk
2817   };
2818   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2822                                         SourceLocation Loc,
2823                                         OpenMPDirectiveKind DKind,
2824                                         const OpenMPScheduleTy &ScheduleKind,
2825                                         const StaticRTInput &Values) {
2826   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2827       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2828   assert(isOpenMPWorksharingDirective(DKind) &&
2829          "Expected loop-based or sections-based directive.");
2830   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2831                                              isOpenMPLoopDirective(DKind)
2832                                                  ? OMP_IDENT_WORK_LOOP
2833                                                  : OMP_IDENT_WORK_SECTIONS);
2834   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2835   llvm::FunctionCallee StaticInitFunction =
2836       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2839                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2840 }
2841 
2842 void CGOpenMPRuntime::emitDistributeStaticInit(
2843     CodeGenFunction &CGF, SourceLocation Loc,
2844     OpenMPDistScheduleClauseKind SchedKind,
2845     const CGOpenMPRuntime::StaticRTInput &Values) {
2846   OpenMPSchedType ScheduleNum =
2847       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2848   llvm::Value *UpdatedLocation =
2849       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2850   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2851   llvm::FunctionCallee StaticInitFunction =
2852       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2854                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2855                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2856 }
2857 
2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2859                                           SourceLocation Loc,
2860                                           OpenMPDirectiveKind DKind) {
2861   if (!CGF.HaveInsertPoint())
2862     return;
2863   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2864   llvm::Value *Args[] = {
2865       emitUpdateLocation(CGF, Loc,
2866                          isOpenMPDistributeDirective(DKind)
2867                              ? OMP_IDENT_WORK_DISTRIBUTE
2868                              : isOpenMPLoopDirective(DKind)
2869                                    ? OMP_IDENT_WORK_LOOP
2870                                    : OMP_IDENT_WORK_SECTIONS),
2871       getThreadID(CGF, Loc)};
2872   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2873   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2874                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                       Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2945   if (OMPBuilder) {
2946     OMPBuilder->CreateFlush(CGF.Builder);
2947   } else {
2948     if (!CGF.HaveInsertPoint())
2949       return;
2950     // Build call void __kmpc_flush(ident_t *loc)
2951     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2952                             CGM.getModule(), OMPRTL___kmpc_flush),
2953                         emitUpdateLocation(CGF, Loc));
2954   }
2955 }
2956 
2957 namespace {
2958 /// Indexes of fields for type kmp_task_t.
2959 enum KmpTaskTFields {
2960   /// List of shared variables.
2961   KmpTaskTShareds,
2962   /// Task routine.
2963   KmpTaskTRoutine,
2964   /// Partition id for the untied tasks.
2965   KmpTaskTPartId,
2966   /// Function with call of destructors for private variables.
2967   Data1,
2968   /// Task priority.
2969   Data2,
2970   /// (Taskloops only) Lower bound.
2971   KmpTaskTLowerBound,
2972   /// (Taskloops only) Upper bound.
2973   KmpTaskTUpperBound,
2974   /// (Taskloops only) Stride.
2975   KmpTaskTStride,
2976   /// (Taskloops only) Is last iteration flag.
2977   KmpTaskTLastIter,
2978   /// (Taskloops only) Reduction data.
2979   KmpTaskTReductions,
2980 };
2981 } // anonymous namespace
2982 
2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2984   return OffloadEntriesTargetRegion.empty() &&
2985          OffloadEntriesDeviceGlobalVar.empty();
2986 }
2987 
2988 /// Initialize target region entry.
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                     StringRef ParentName, unsigned LineNum,
2992                                     unsigned Order) {
2993   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2994                                              "only required for the device "
2995                                              "code generation.");
2996   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2997       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2998                                    OMPTargetRegionEntryTargetRegion);
2999   ++OffloadingEntriesNum;
3000 }
3001 
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                   StringRef ParentName, unsigned LineNum,
3005                                   llvm::Constant *Addr, llvm::Constant *ID,
3006                                   OMPTargetRegionEntryKind Flags) {
3007   // If we are emitting code for a target, the entry is already initialized,
3008   // only has to be registered.
3009   if (CGM.getLangOpts().OpenMPIsDevice) {
3010     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3011       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3012           DiagnosticsEngine::Error,
3013           "Unable to find target region on line '%0' in the device code.");
3014       CGM.getDiags().Report(DiagID) << LineNum;
3015       return;
3016     }
3017     auto &Entry =
3018         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3019     assert(Entry.isValid() && "Entry not initialized!");
3020     Entry.setAddress(Addr);
3021     Entry.setID(ID);
3022     Entry.setFlags(Flags);
3023   } else {
3024     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3025     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3026     ++OffloadingEntriesNum;
3027   }
3028 }
3029 
3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3031     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3032     unsigned LineNum) const {
3033   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3034   if (PerDevice == OffloadEntriesTargetRegion.end())
3035     return false;
3036   auto PerFile = PerDevice->second.find(FileID);
3037   if (PerFile == PerDevice->second.end())
3038     return false;
3039   auto PerParentName = PerFile->second.find(ParentName);
3040   if (PerParentName == PerFile->second.end())
3041     return false;
3042   auto PerLine = PerParentName->second.find(LineNum);
3043   if (PerLine == PerParentName->second.end())
3044     return false;
3045   // Fail if this entry is already registered.
3046   if (PerLine->second.getAddress() || PerLine->second.getID())
3047     return false;
3048   return true;
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3052     const OffloadTargetRegionEntryInfoActTy &Action) {
3053   // Scan all target region entries and perform the provided action.
3054   for (const auto &D : OffloadEntriesTargetRegion)
3055     for (const auto &F : D.second)
3056       for (const auto &P : F.second)
3057         for (const auto &L : P.second)
3058           Action(D.first, F.first, P.first(), L.first, L.second);
3059 }
3060 
3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3062     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3063                                        OMPTargetGlobalVarEntryKind Flags,
3064                                        unsigned Order) {
3065   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3066                                              "only required for the device "
3067                                              "code generation.");
3068   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3069   ++OffloadingEntriesNum;
3070 }
3071 
3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3073     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3074                                      CharUnits VarSize,
3075                                      OMPTargetGlobalVarEntryKind Flags,
3076                                      llvm::GlobalValue::LinkageTypes Linkage) {
3077   if (CGM.getLangOpts().OpenMPIsDevice) {
3078     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3079     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3080            "Entry not initialized!");
3081     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3082            "Resetting with the new address.");
3083     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3084       if (Entry.getVarSize().isZero()) {
3085         Entry.setVarSize(VarSize);
3086         Entry.setLinkage(Linkage);
3087       }
3088       return;
3089     }
3090     Entry.setVarSize(VarSize);
3091     Entry.setLinkage(Linkage);
3092     Entry.setAddress(Addr);
3093   } else {
3094     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3095       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3096       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3097              "Entry not initialized!");
3098       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3099              "Resetting with the new address.");
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     OffloadEntriesDeviceGlobalVar.try_emplace(
3107         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3108     ++OffloadingEntriesNum;
3109   }
3110 }
3111 
3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3113     actOnDeviceGlobalVarEntriesInfo(
3114         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3115   // Scan all target region entries and perform the provided action.
3116   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3117     Action(E.getKey(), E.getValue());
3118 }
3119 
3120 void CGOpenMPRuntime::createOffloadEntry(
3121     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3122     llvm::GlobalValue::LinkageTypes Linkage) {
3123   StringRef Name = Addr->getName();
3124   llvm::Module &M = CGM.getModule();
3125   llvm::LLVMContext &C = M.getContext();
3126 
3127   // Create constant string with the name.
3128   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3129 
3130   std::string StringName = getName({"omp_offloading", "entry_name"});
3131   auto *Str = new llvm::GlobalVariable(
3132       M, StrPtrInit->getType(), /*isConstant=*/true,
3133       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3134   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3135 
3136   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3137                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3138                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3139                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3140                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3141   std::string EntryName = getName({"omp_offloading", "entry", ""});
3142   llvm::GlobalVariable *Entry = createGlobalStruct(
3143       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3144       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3145 
3146   // The entry has to be created in the section the linker expects it to be.
3147   Entry->setSection("omp_offloading_entries");
3148 }
3149 
3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3151   // Emit the offloading entries and metadata so that the device codegen side
3152   // can easily figure out what to emit. The produced metadata looks like
3153   // this:
3154   //
3155   // !omp_offload.info = !{!1, ...}
3156   //
3157   // Right now we only generate metadata for function that contain target
3158   // regions.
3159 
3160   // If we are in simd mode or there are no entries, we don't need to do
3161   // anything.
3162   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3163     return;
3164 
3165   llvm::Module &M = CGM.getModule();
3166   llvm::LLVMContext &C = M.getContext();
3167   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3168                          SourceLocation, StringRef>,
3169               16>
3170       OrderedEntries(OffloadEntriesInfoManager.size());
3171   llvm::SmallVector<StringRef, 16> ParentFunctions(
3172       OffloadEntriesInfoManager.size());
3173 
3174   // Auxiliary methods to create metadata values and strings.
3175   auto &&GetMDInt = [this](unsigned V) {
3176     return llvm::ConstantAsMetadata::get(
3177         llvm::ConstantInt::get(CGM.Int32Ty, V));
3178   };
3179 
3180   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3181 
3182   // Create the offloading info metadata node.
3183   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3184 
3185   // Create function that emits metadata for each target region entry;
3186   auto &&TargetRegionMetadataEmitter =
3187       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3188        &GetMDString](
3189           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3190           unsigned Line,
3191           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3192         // Generate metadata for target regions. Each entry of this metadata
3193         // contains:
3194         // - Entry 0 -> Kind of this type of metadata (0).
3195         // - Entry 1 -> Device ID of the file where the entry was identified.
3196         // - Entry 2 -> File ID of the file where the entry was identified.
3197         // - Entry 3 -> Mangled name of the function where the entry was
3198         // identified.
3199         // - Entry 4 -> Line in the file where the entry was identified.
3200         // - Entry 5 -> Order the entry was created.
3201         // The first element of the metadata node is the kind.
3202         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3203                                  GetMDInt(FileID),      GetMDString(ParentName),
3204                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3205 
3206         SourceLocation Loc;
3207         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3208                   E = CGM.getContext().getSourceManager().fileinfo_end();
3209              I != E; ++I) {
3210           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3211               I->getFirst()->getUniqueID().getFile() == FileID) {
3212             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3213                 I->getFirst(), Line, 1);
3214             break;
3215           }
3216         }
3217         // Save this entry in the right position of the ordered entries array.
3218         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3219         ParentFunctions[E.getOrder()] = ParentName;
3220 
3221         // Add metadata to the named metadata node.
3222         MD->addOperand(llvm::MDNode::get(C, Ops));
3223       };
3224 
3225   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3226       TargetRegionMetadataEmitter);
3227 
3228   // Create function that emits metadata for each device global variable entry;
3229   auto &&DeviceGlobalVarMetadataEmitter =
3230       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3231        MD](StringRef MangledName,
3232            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3233                &E) {
3234         // Generate metadata for global variables. Each entry of this metadata
3235         // contains:
3236         // - Entry 0 -> Kind of this type of metadata (1).
3237         // - Entry 1 -> Mangled name of the variable.
3238         // - Entry 2 -> Declare target kind.
3239         // - Entry 3 -> Order the entry was created.
3240         // The first element of the metadata node is the kind.
3241         llvm::Metadata *Ops[] = {
3242             GetMDInt(E.getKind()), GetMDString(MangledName),
3243             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3244 
3245         // Save this entry in the right position of the ordered entries array.
3246         OrderedEntries[E.getOrder()] =
3247             std::make_tuple(&E, SourceLocation(), MangledName);
3248 
3249         // Add metadata to the named metadata node.
3250         MD->addOperand(llvm::MDNode::get(C, Ops));
3251       };
3252 
3253   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3254       DeviceGlobalVarMetadataEmitter);
3255 
3256   for (const auto &E : OrderedEntries) {
3257     assert(std::get<0>(E) && "All ordered entries must exist!");
3258     if (const auto *CE =
3259             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3260                 std::get<0>(E))) {
3261       if (!CE->getID() || !CE->getAddress()) {
3262         // Do not blame the entry if the parent funtion is not emitted.
3263         StringRef FnName = ParentFunctions[CE->getOrder()];
3264         if (!CGM.GetGlobalValue(FnName))
3265           continue;
3266         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3267             DiagnosticsEngine::Error,
3268             "Offloading entry for target region in %0 is incorrect: either the "
3269             "address or the ID is invalid.");
3270         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3271         continue;
3272       }
3273       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3274                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3275     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3276                                              OffloadEntryInfoDeviceGlobalVar>(
3277                    std::get<0>(E))) {
3278       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3279           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3280               CE->getFlags());
3281       switch (Flags) {
3282       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3283         if (CGM.getLangOpts().OpenMPIsDevice &&
3284             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3285           continue;
3286         if (!CE->getAddress()) {
3287           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3288               DiagnosticsEngine::Error, "Offloading entry for declare target "
3289                                         "variable %0 is incorrect: the "
3290                                         "address is invalid.");
3291           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3292           continue;
3293         }
3294         // The vaiable has no definition - no need to add the entry.
3295         if (CE->getVarSize().isZero())
3296           continue;
3297         break;
3298       }
3299       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3300         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3301                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3302                "Declaret target link address is set.");
3303         if (CGM.getLangOpts().OpenMPIsDevice)
3304           continue;
3305         if (!CE->getAddress()) {
3306           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3307               DiagnosticsEngine::Error,
3308               "Offloading entry for declare target variable is incorrect: the "
3309               "address is invalid.");
3310           CGM.getDiags().Report(DiagID);
3311           continue;
3312         }
3313         break;
3314       }
3315       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3316                          CE->getVarSize().getQuantity(), Flags,
3317                          CE->getLinkage());
3318     } else {
3319       llvm_unreachable("Unsupported entry kind.");
3320     }
3321   }
3322 }
3323 
3324 /// Loads all the offload entries information from the host IR
3325 /// metadata.
3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3327   // If we are in target mode, load the metadata from the host IR. This code has
3328   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3329 
3330   if (!CGM.getLangOpts().OpenMPIsDevice)
3331     return;
3332 
3333   if (CGM.getLangOpts().OMPHostIRFile.empty())
3334     return;
3335 
3336   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3337   if (auto EC = Buf.getError()) {
3338     CGM.getDiags().Report(diag::err_cannot_open_file)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::LLVMContext C;
3344   auto ME = expectedToErrorOrAndEmitErrors(
3345       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3346 
3347   if (auto EC = ME.getError()) {
3348     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3349         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3350     CGM.getDiags().Report(DiagID)
3351         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3352     return;
3353   }
3354 
3355   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3356   if (!MD)
3357     return;
3358 
3359   for (llvm::MDNode *MN : MD->operands()) {
3360     auto &&GetMDInt = [MN](unsigned Idx) {
3361       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3362       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3363     };
3364 
3365     auto &&GetMDString = [MN](unsigned Idx) {
3366       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3367       return V->getString();
3368     };
3369 
3370     switch (GetMDInt(0)) {
3371     default:
3372       llvm_unreachable("Unexpected metadata!");
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoTargetRegion:
3376       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3377           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3378           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3379           /*Order=*/GetMDInt(5));
3380       break;
3381     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3382         OffloadingEntryInfoDeviceGlobalVar:
3383       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3384           /*MangledName=*/GetMDString(1),
3385           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3386               /*Flags=*/GetMDInt(2)),
3387           /*Order=*/GetMDInt(3));
3388       break;
3389     }
3390   }
3391 }
3392 
3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3394   if (!KmpRoutineEntryPtrTy) {
3395     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3396     ASTContext &C = CGM.getContext();
3397     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3398     FunctionProtoType::ExtProtoInfo EPI;
3399     KmpRoutineEntryPtrQTy = C.getPointerType(
3400         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3401     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3402   }
3403 }
3404 
3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3406   // Make sure the type of the entry is already created. This is the type we
3407   // have to create:
3408   // struct __tgt_offload_entry{
3409   //   void      *addr;       // Pointer to the offload entry info.
3410   //                          // (function or global)
3411   //   char      *name;       // Name of the function or global.
3412   //   size_t     size;       // Size of the entry info (0 if it a function).
3413   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3414   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3415   // };
3416   if (TgtOffloadEntryQTy.isNull()) {
3417     ASTContext &C = CGM.getContext();
3418     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3419     RD->startDefinition();
3420     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3421     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3422     addFieldToRecordDecl(C, RD, C.getSizeType());
3423     addFieldToRecordDecl(
3424         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3425     addFieldToRecordDecl(
3426         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3427     RD->completeDefinition();
3428     RD->addAttr(PackedAttr::CreateImplicit(C));
3429     TgtOffloadEntryQTy = C.getRecordType(RD);
3430   }
3431   return TgtOffloadEntryQTy;
3432 }
3433 
3434 namespace {
3435 struct PrivateHelpersTy {
3436   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3437                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3438       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3439         PrivateElemInit(PrivateElemInit) {}
3440   const Expr *OriginalRef = nullptr;
3441   const VarDecl *Original = nullptr;
3442   const VarDecl *PrivateCopy = nullptr;
3443   const VarDecl *PrivateElemInit = nullptr;
3444 };
3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3446 } // anonymous namespace
3447 
3448 static RecordDecl *
3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3450   if (!Privates.empty()) {
3451     ASTContext &C = CGM.getContext();
3452     // Build struct .kmp_privates_t. {
3453     //         /*  private vars  */
3454     //       };
3455     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3456     RD->startDefinition();
3457     for (const auto &Pair : Privates) {
3458       const VarDecl *VD = Pair.second.Original;
3459       QualType Type = VD->getType().getNonReferenceType();
3460       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3461       if (VD->hasAttrs()) {
3462         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3463              E(VD->getAttrs().end());
3464              I != E; ++I)
3465           FD->addAttr(*I);
3466       }
3467     }
3468     RD->completeDefinition();
3469     return RD;
3470   }
3471   return nullptr;
3472 }
3473 
3474 static RecordDecl *
3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3476                          QualType KmpInt32Ty,
3477                          QualType KmpRoutineEntryPointerQTy) {
3478   ASTContext &C = CGM.getContext();
3479   // Build struct kmp_task_t {
3480   //         void *              shareds;
3481   //         kmp_routine_entry_t routine;
3482   //         kmp_int32           part_id;
3483   //         kmp_cmplrdata_t data1;
3484   //         kmp_cmplrdata_t data2;
3485   // For taskloops additional fields:
3486   //         kmp_uint64          lb;
3487   //         kmp_uint64          ub;
3488   //         kmp_int64           st;
3489   //         kmp_int32           liter;
3490   //         void *              reductions;
3491   //       };
3492   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3493   UD->startDefinition();
3494   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3495   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3496   UD->completeDefinition();
3497   QualType KmpCmplrdataTy = C.getRecordType(UD);
3498   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3499   RD->startDefinition();
3500   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3501   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3502   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3503   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3504   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3505   if (isOpenMPTaskLoopDirective(Kind)) {
3506     QualType KmpUInt64Ty =
3507         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3508     QualType KmpInt64Ty =
3509         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3510     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3511     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3513     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3515   }
3516   RD->completeDefinition();
3517   return RD;
3518 }
3519 
3520 static RecordDecl *
3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3522                                      ArrayRef<PrivateDataTy> Privates) {
3523   ASTContext &C = CGM.getContext();
3524   // Build struct kmp_task_t_with_privates {
3525   //         kmp_task_t task_data;
3526   //         .kmp_privates_t. privates;
3527   //       };
3528   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3529   RD->startDefinition();
3530   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3531   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3532     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3533   RD->completeDefinition();
3534   return RD;
3535 }
3536 
3537 /// Emit a proxy function which accepts kmp_task_t as the second
3538 /// argument.
3539 /// \code
3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3541 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3542 ///   For taskloops:
3543 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3544 ///   tt->reductions, tt->shareds);
3545 ///   return 0;
3546 /// }
3547 /// \endcode
3548 static llvm::Function *
3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3550                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3551                       QualType KmpTaskTWithPrivatesPtrQTy,
3552                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3553                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3554                       llvm::Value *TaskPrivatesMap) {
3555   ASTContext &C = CGM.getContext();
3556   FunctionArgList Args;
3557   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3558                             ImplicitParamDecl::Other);
3559   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3560                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3561                                 ImplicitParamDecl::Other);
3562   Args.push_back(&GtidArg);
3563   Args.push_back(&TaskTypeArg);
3564   const auto &TaskEntryFnInfo =
3565       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3566   llvm::FunctionType *TaskEntryTy =
3567       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3568   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3569   auto *TaskEntry = llvm::Function::Create(
3570       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3571   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3572   TaskEntry->setDoesNotRecurse();
3573   CodeGenFunction CGF(CGM);
3574   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3575                     Loc, Loc);
3576 
3577   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3578   // tt,
3579   // For taskloops:
3580   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581   // tt->task_data.shareds);
3582   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3583       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3584   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3585       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3586       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3587   const auto *KmpTaskTWithPrivatesQTyRD =
3588       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3589   LValue Base =
3590       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3591   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3592   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3593   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3594   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3595 
3596   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3597   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3598   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3599       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3600       CGF.ConvertTypeForMem(SharedsPtrTy));
3601 
3602   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3603   llvm::Value *PrivatesParam;
3604   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3605     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3606     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3607         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3608   } else {
3609     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3610   }
3611 
3612   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3613                                TaskPrivatesMap,
3614                                CGF.Builder
3615                                    .CreatePointerBitCastOrAddrSpaceCast(
3616                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3617                                    .getPointer()};
3618   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3619                                           std::end(CommonArgs));
3620   if (isOpenMPTaskLoopDirective(Kind)) {
3621     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3622     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3623     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3624     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3625     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3626     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3627     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3628     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3629     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3630     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3631     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3632     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3633     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3634     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3635     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3636     CallArgs.push_back(LBParam);
3637     CallArgs.push_back(UBParam);
3638     CallArgs.push_back(StParam);
3639     CallArgs.push_back(LIParam);
3640     CallArgs.push_back(RParam);
3641   }
3642   CallArgs.push_back(SharedsParam);
3643 
3644   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3645                                                   CallArgs);
3646   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3647                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3648   CGF.FinishFunction();
3649   return TaskEntry;
3650 }
3651 
3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3653                                             SourceLocation Loc,
3654                                             QualType KmpInt32Ty,
3655                                             QualType KmpTaskTWithPrivatesPtrQTy,
3656                                             QualType KmpTaskTWithPrivatesQTy) {
3657   ASTContext &C = CGM.getContext();
3658   FunctionArgList Args;
3659   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3660                             ImplicitParamDecl::Other);
3661   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3662                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3663                                 ImplicitParamDecl::Other);
3664   Args.push_back(&GtidArg);
3665   Args.push_back(&TaskTypeArg);
3666   const auto &DestructorFnInfo =
3667       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3668   llvm::FunctionType *DestructorFnTy =
3669       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3670   std::string Name =
3671       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3672   auto *DestructorFn =
3673       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3674                              Name, &CGM.getModule());
3675   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3676                                     DestructorFnInfo);
3677   DestructorFn->setDoesNotRecurse();
3678   CodeGenFunction CGF(CGM);
3679   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3680                     Args, Loc, Loc);
3681 
3682   LValue Base = CGF.EmitLoadOfPointerLValue(
3683       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3684       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3685   const auto *KmpTaskTWithPrivatesQTyRD =
3686       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3687   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3688   Base = CGF.EmitLValueForField(Base, *FI);
3689   for (const auto *Field :
3690        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3691     if (QualType::DestructionKind DtorKind =
3692             Field->getType().isDestructedType()) {
3693       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3694       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3695     }
3696   }
3697   CGF.FinishFunction();
3698   return DestructorFn;
3699 }
3700 
3701 /// Emit a privates mapping function for correct handling of private and
3702 /// firstprivate variables.
3703 /// \code
3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3705 /// **noalias priv1,...,  <tyn> **noalias privn) {
3706 ///   *priv1 = &.privates.priv1;
3707 ///   ...;
3708 ///   *privn = &.privates.privn;
3709 /// }
3710 /// \endcode
3711 static llvm::Value *
3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3713                                ArrayRef<const Expr *> PrivateVars,
3714                                ArrayRef<const Expr *> FirstprivateVars,
3715                                ArrayRef<const Expr *> LastprivateVars,
3716                                QualType PrivatesQTy,
3717                                ArrayRef<PrivateDataTy> Privates) {
3718   ASTContext &C = CGM.getContext();
3719   FunctionArgList Args;
3720   ImplicitParamDecl TaskPrivatesArg(
3721       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3722       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3723       ImplicitParamDecl::Other);
3724   Args.push_back(&TaskPrivatesArg);
3725   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3726   unsigned Counter = 1;
3727   for (const Expr *E : PrivateVars) {
3728     Args.push_back(ImplicitParamDecl::Create(
3729         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730         C.getPointerType(C.getPointerType(E->getType()))
3731             .withConst()
3732             .withRestrict(),
3733         ImplicitParamDecl::Other));
3734     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3735     PrivateVarsPos[VD] = Counter;
3736     ++Counter;
3737   }
3738   for (const Expr *E : FirstprivateVars) {
3739     Args.push_back(ImplicitParamDecl::Create(
3740         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3741         C.getPointerType(C.getPointerType(E->getType()))
3742             .withConst()
3743             .withRestrict(),
3744         ImplicitParamDecl::Other));
3745     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3746     PrivateVarsPos[VD] = Counter;
3747     ++Counter;
3748   }
3749   for (const Expr *E : LastprivateVars) {
3750     Args.push_back(ImplicitParamDecl::Create(
3751         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752         C.getPointerType(C.getPointerType(E->getType()))
3753             .withConst()
3754             .withRestrict(),
3755         ImplicitParamDecl::Other));
3756     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3757     PrivateVarsPos[VD] = Counter;
3758     ++Counter;
3759   }
3760   const auto &TaskPrivatesMapFnInfo =
3761       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3762   llvm::FunctionType *TaskPrivatesMapTy =
3763       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3764   std::string Name =
3765       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3766   auto *TaskPrivatesMap = llvm::Function::Create(
3767       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3768       &CGM.getModule());
3769   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3770                                     TaskPrivatesMapFnInfo);
3771   if (CGM.getLangOpts().Optimize) {
3772     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3773     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3774     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3775   }
3776   CodeGenFunction CGF(CGM);
3777   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3778                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3779 
3780   // *privi = &.privates.privi;
3781   LValue Base = CGF.EmitLoadOfPointerLValue(
3782       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3783       TaskPrivatesArg.getType()->castAs<PointerType>());
3784   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3785   Counter = 0;
3786   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3787     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3788     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3789     LValue RefLVal =
3790         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3791     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3792         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3793     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3794     ++Counter;
3795   }
3796   CGF.FinishFunction();
3797   return TaskPrivatesMap;
3798 }
3799 
3800 /// Emit initialization for private variables in task-based directives.
3801 static void emitPrivatesInit(CodeGenFunction &CGF,
3802                              const OMPExecutableDirective &D,
3803                              Address KmpTaskSharedsPtr, LValue TDBase,
3804                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3805                              QualType SharedsTy, QualType SharedsPtrTy,
3806                              const OMPTaskDataTy &Data,
3807                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3808   ASTContext &C = CGF.getContext();
3809   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3810   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3811   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3812                                  ? OMPD_taskloop
3813                                  : OMPD_task;
3814   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3815   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3816   LValue SrcBase;
3817   bool IsTargetTask =
3818       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3819       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3820   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3821   // PointersArray and SizesArray. The original variables for these arrays are
3822   // not captured and we get their addresses explicitly.
3823   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3824       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3825     SrcBase = CGF.MakeAddrLValue(
3826         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3827             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3828         SharedsTy);
3829   }
3830   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3831   for (const PrivateDataTy &Pair : Privates) {
3832     const VarDecl *VD = Pair.second.PrivateCopy;
3833     const Expr *Init = VD->getAnyInitializer();
3834     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3835                              !CGF.isTrivialInitializer(Init)))) {
3836       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3837       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3838         const VarDecl *OriginalVD = Pair.second.Original;
3839         // Check if the variable is the target-based BasePointersArray,
3840         // PointersArray or SizesArray.
3841         LValue SharedRefLValue;
3842         QualType Type = PrivateLValue.getType();
3843         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3844         if (IsTargetTask && !SharedField) {
3845           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3846                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3847                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3848                          ->getNumParams() == 0 &&
3849                  isa<TranslationUnitDecl>(
3850                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3851                          ->getDeclContext()) &&
3852                  "Expected artificial target data variable.");
3853           SharedRefLValue =
3854               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3855         } else if (ForDup) {
3856           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3857           SharedRefLValue = CGF.MakeAddrLValue(
3858               Address(SharedRefLValue.getPointer(CGF),
3859                       C.getDeclAlign(OriginalVD)),
3860               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3861               SharedRefLValue.getTBAAInfo());
3862         } else if (CGF.LambdaCaptureFields.count(
3863                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3864                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3865           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3866         } else {
3867           // Processing for implicitly captured variables.
3868           InlinedOpenMPRegionRAII Region(
3869               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3870               /*HasCancel=*/false);
3871           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3872         }
3873         if (Type->isArrayType()) {
3874           // Initialize firstprivate array.
3875           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3876             // Perform simple memcpy.
3877             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3878           } else {
3879             // Initialize firstprivate array using element-by-element
3880             // initialization.
3881             CGF.EmitOMPAggregateAssign(
3882                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3883                 Type,
3884                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3885                                                   Address SrcElement) {
3886                   // Clean up any temporaries needed by the initialization.
3887                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3888                   InitScope.addPrivate(
3889                       Elem, [SrcElement]() -> Address { return SrcElement; });
3890                   (void)InitScope.Privatize();
3891                   // Emit initialization for single element.
3892                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3893                       CGF, &CapturesInfo);
3894                   CGF.EmitAnyExprToMem(Init, DestElement,
3895                                        Init->getType().getQualifiers(),
3896                                        /*IsInitializer=*/false);
3897                 });
3898           }
3899         } else {
3900           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3901           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3902             return SharedRefLValue.getAddress(CGF);
3903           });
3904           (void)InitScope.Privatize();
3905           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3906           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3907                              /*capturedByInit=*/false);
3908         }
3909       } else {
3910         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3911       }
3912     }
3913     ++FI;
3914   }
3915 }
3916 
3917 /// Check if duplication function is required for taskloops.
3918 static bool checkInitIsRequired(CodeGenFunction &CGF,
3919                                 ArrayRef<PrivateDataTy> Privates) {
3920   bool InitRequired = false;
3921   for (const PrivateDataTy &Pair : Privates) {
3922     const VarDecl *VD = Pair.second.PrivateCopy;
3923     const Expr *Init = VD->getAnyInitializer();
3924     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3925                                     !CGF.isTrivialInitializer(Init));
3926     if (InitRequired)
3927       break;
3928   }
3929   return InitRequired;
3930 }
3931 
3932 
3933 /// Emit task_dup function (for initialization of
3934 /// private/firstprivate/lastprivate vars and last_iter flag)
3935 /// \code
3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3937 /// lastpriv) {
3938 /// // setup lastprivate flag
3939 ///    task_dst->last = lastpriv;
3940 /// // could be constructor calls here...
3941 /// }
3942 /// \endcode
3943 static llvm::Value *
3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3945                     const OMPExecutableDirective &D,
3946                     QualType KmpTaskTWithPrivatesPtrQTy,
3947                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3948                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3949                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3950                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3951   ASTContext &C = CGM.getContext();
3952   FunctionArgList Args;
3953   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3954                            KmpTaskTWithPrivatesPtrQTy,
3955                            ImplicitParamDecl::Other);
3956   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3957                            KmpTaskTWithPrivatesPtrQTy,
3958                            ImplicitParamDecl::Other);
3959   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3960                                 ImplicitParamDecl::Other);
3961   Args.push_back(&DstArg);
3962   Args.push_back(&SrcArg);
3963   Args.push_back(&LastprivArg);
3964   const auto &TaskDupFnInfo =
3965       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3966   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3967   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3968   auto *TaskDup = llvm::Function::Create(
3969       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3970   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3971   TaskDup->setDoesNotRecurse();
3972   CodeGenFunction CGF(CGM);
3973   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3974                     Loc);
3975 
3976   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3977       CGF.GetAddrOfLocalVar(&DstArg),
3978       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3979   // task_dst->liter = lastpriv;
3980   if (WithLastIter) {
3981     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3982     LValue Base = CGF.EmitLValueForField(
3983         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3984     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3985     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3986         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3987     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3988   }
3989 
3990   // Emit initial values for private copies (if any).
3991   assert(!Privates.empty());
3992   Address KmpTaskSharedsPtr = Address::invalid();
3993   if (!Data.FirstprivateVars.empty()) {
3994     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3995         CGF.GetAddrOfLocalVar(&SrcArg),
3996         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3997     LValue Base = CGF.EmitLValueForField(
3998         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3999     KmpTaskSharedsPtr = Address(
4000         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4001                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4002                                                   KmpTaskTShareds)),
4003                              Loc),
4004         CGM.getNaturalTypeAlignment(SharedsTy));
4005   }
4006   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4007                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4008   CGF.FinishFunction();
4009   return TaskDup;
4010 }
4011 
4012 /// Checks if destructor function is required to be generated.
4013 /// \return true if cleanups are required, false otherwise.
4014 static bool
4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4016   bool NeedsCleanup = false;
4017   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4018   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4019   for (const FieldDecl *FD : PrivateRD->fields()) {
4020     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4021     if (NeedsCleanup)
4022       break;
4023   }
4024   return NeedsCleanup;
4025 }
4026 
4027 namespace {
4028 /// Loop generator for OpenMP iterator expression.
4029 class OMPIteratorGeneratorScope final
4030     : public CodeGenFunction::OMPPrivateScope {
4031   CodeGenFunction &CGF;
4032   const OMPIteratorExpr *E = nullptr;
4033   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4034   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4035   OMPIteratorGeneratorScope() = delete;
4036   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4037 
4038 public:
4039   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4040       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4041     if (!E)
4042       return;
4043     SmallVector<llvm::Value *, 4> Uppers;
4044     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4045       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4046       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4047       addPrivate(VD, [&CGF, VD]() {
4048         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4049       });
4050       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4051       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4052         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4053                                  "counter.addr");
4054       });
4055     }
4056     Privatize();
4057 
4058     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4059       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4060       LValue CLVal =
4061           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4062                              HelperData.CounterVD->getType());
4063       // Counter = 0;
4064       CGF.EmitStoreOfScalar(
4065           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4066           CLVal);
4067       CodeGenFunction::JumpDest &ContDest =
4068           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4069       CodeGenFunction::JumpDest &ExitDest =
4070           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4071       // N = <number-of_iterations>;
4072       llvm::Value *N = Uppers[I];
4073       // cont:
4074       // if (Counter < N) goto body; else goto exit;
4075       CGF.EmitBlock(ContDest.getBlock());
4076       auto *CVal =
4077           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4078       llvm::Value *Cmp =
4079           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4080               ? CGF.Builder.CreateICmpSLT(CVal, N)
4081               : CGF.Builder.CreateICmpULT(CVal, N);
4082       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4083       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4084       // body:
4085       CGF.EmitBlock(BodyBB);
4086       // Iteri = Begini + Counter * Stepi;
4087       CGF.EmitIgnoredExpr(HelperData.Update);
4088     }
4089   }
4090   ~OMPIteratorGeneratorScope() {
4091     if (!E)
4092       return;
4093     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4094       // Counter = Counter + 1;
4095       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4096       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4097       // goto cont;
4098       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4099       // exit:
4100       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4101     }
4102   }
4103 };
4104 } // namespace
4105 
4106 static std::pair<llvm::Value *, llvm::Value *>
4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4108   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4109   llvm::Value *Addr;
4110   if (OASE) {
4111     const Expr *Base = OASE->getBase();
4112     Addr = CGF.EmitScalarExpr(Base);
4113   } else {
4114     Addr = CGF.EmitLValue(E).getPointer(CGF);
4115   }
4116   llvm::Value *SizeVal;
4117   QualType Ty = E->getType();
4118   if (OASE) {
4119     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4120     for (const Expr *SE : OASE->getDimensions()) {
4121       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4122       Sz = CGF.EmitScalarConversion(
4123           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4124       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4125     }
4126   } else if (const auto *ASE =
4127                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4128     LValue UpAddrLVal =
4129         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4130     llvm::Value *UpAddr =
4131         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4132     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4133     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4134     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4135   } else {
4136     SizeVal = CGF.getTypeSize(Ty);
4137   }
4138   return std::make_pair(Addr, SizeVal);
4139 }
4140 
4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4143   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4144   if (KmpTaskAffinityInfoTy.isNull()) {
4145     RecordDecl *KmpAffinityInfoRD =
4146         C.buildImplicitRecord("kmp_task_affinity_info_t");
4147     KmpAffinityInfoRD->startDefinition();
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4150     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4151     KmpAffinityInfoRD->completeDefinition();
4152     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4153   }
4154 }
4155 
4156 CGOpenMPRuntime::TaskResultTy
4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4158                               const OMPExecutableDirective &D,
4159                               llvm::Function *TaskFunction, QualType SharedsTy,
4160                               Address Shareds, const OMPTaskDataTy &Data) {
4161   ASTContext &C = CGM.getContext();
4162   llvm::SmallVector<PrivateDataTy, 4> Privates;
4163   // Aggregate privates and sort them by the alignment.
4164   const auto *I = Data.PrivateCopies.begin();
4165   for (const Expr *E : Data.PrivateVars) {
4166     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4167     Privates.emplace_back(
4168         C.getDeclAlign(VD),
4169         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4170                          /*PrivateElemInit=*/nullptr));
4171     ++I;
4172   }
4173   I = Data.FirstprivateCopies.begin();
4174   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4175   for (const Expr *E : Data.FirstprivateVars) {
4176     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4177     Privates.emplace_back(
4178         C.getDeclAlign(VD),
4179         PrivateHelpersTy(
4180             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4181             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4182     ++I;
4183     ++IElemInitRef;
4184   }
4185   I = Data.LastprivateCopies.begin();
4186   for (const Expr *E : Data.LastprivateVars) {
4187     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4188     Privates.emplace_back(
4189         C.getDeclAlign(VD),
4190         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4191                          /*PrivateElemInit=*/nullptr));
4192     ++I;
4193   }
4194   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4195     return L.first > R.first;
4196   });
4197   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4198   // Build type kmp_routine_entry_t (if not built yet).
4199   emitKmpRoutineEntryT(KmpInt32Ty);
4200   // Build type kmp_task_t (if not built yet).
4201   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4202     if (SavedKmpTaskloopTQTy.isNull()) {
4203       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4204           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4205     }
4206     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4207   } else {
4208     assert((D.getDirectiveKind() == OMPD_task ||
4209             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4210             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4211            "Expected taskloop, task or target directive");
4212     if (SavedKmpTaskTQTy.isNull()) {
4213       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4214           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4215     }
4216     KmpTaskTQTy = SavedKmpTaskTQTy;
4217   }
4218   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4219   // Build particular struct kmp_task_t for the given task.
4220   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4221       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4222   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4223   QualType KmpTaskTWithPrivatesPtrQTy =
4224       C.getPointerType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4226   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4227       KmpTaskTWithPrivatesTy->getPointerTo();
4228   llvm::Value *KmpTaskTWithPrivatesTySize =
4229       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4230   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4231 
4232   // Emit initial values for private copies (if any).
4233   llvm::Value *TaskPrivatesMap = nullptr;
4234   llvm::Type *TaskPrivatesMapTy =
4235       std::next(TaskFunction->arg_begin(), 3)->getType();
4236   if (!Privates.empty()) {
4237     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4238     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4239         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4240         FI->getType(), Privates);
4241     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4242         TaskPrivatesMap, TaskPrivatesMapTy);
4243   } else {
4244     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4245         cast<llvm::PointerType>(TaskPrivatesMapTy));
4246   }
4247   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4248   // kmp_task_t *tt);
4249   llvm::Function *TaskEntry = emitProxyTaskFunction(
4250       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4251       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4252       TaskPrivatesMap);
4253 
4254   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4255   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4256   // kmp_routine_entry_t *task_entry);
4257   // Task flags. Format is taken from
4258   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4259   // description of kmp_tasking_flags struct.
4260   enum {
4261     TiedFlag = 0x1,
4262     FinalFlag = 0x2,
4263     DestructorsFlag = 0x8,
4264     PriorityFlag = 0x20,
4265     DetachableFlag = 0x40,
4266   };
4267   unsigned Flags = Data.Tied ? TiedFlag : 0;
4268   bool NeedsCleanup = false;
4269   if (!Privates.empty()) {
4270     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4271     if (NeedsCleanup)
4272       Flags = Flags | DestructorsFlag;
4273   }
4274   if (Data.Priority.getInt())
4275     Flags = Flags | PriorityFlag;
4276   if (D.hasClausesOfKind<OMPDetachClause>())
4277     Flags = Flags | DetachableFlag;
4278   llvm::Value *TaskFlags =
4279       Data.Final.getPointer()
4280           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4281                                      CGF.Builder.getInt32(FinalFlag),
4282                                      CGF.Builder.getInt32(/*C=*/0))
4283           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4284   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4285   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4286   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4287       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4288       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4289           TaskEntry, KmpRoutineEntryPtrTy)};
4290   llvm::Value *NewTask;
4291   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4292     // Check if we have any device clause associated with the directive.
4293     const Expr *Device = nullptr;
4294     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4295       Device = C->getDevice();
4296     // Emit device ID if any otherwise use default value.
4297     llvm::Value *DeviceID;
4298     if (Device)
4299       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4300                                            CGF.Int64Ty, /*isSigned=*/true);
4301     else
4302       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4303     AllocArgs.push_back(DeviceID);
4304     NewTask = CGF.EmitRuntimeCall(
4305         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4306             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4307         AllocArgs);
4308   } else {
4309     NewTask =
4310         CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4311                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4312                             AllocArgs);
4313   }
4314   // Emit detach clause initialization.
4315   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4316   // task_descriptor);
4317   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4318     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4319     LValue EvtLVal = CGF.EmitLValue(Evt);
4320 
4321     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4322     // int gtid, kmp_task_t *task);
4323     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4324     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4325     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4326     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4327         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4328             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4329         {Loc, Tid, NewTask});
4330     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4331                                       Evt->getExprLoc());
4332     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4333   }
4334   // Process affinity clauses.
4335   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4336     // Process list of affinity data.
4337     ASTContext &C = CGM.getContext();
4338     Address AffinitiesArray = Address::invalid();
4339     // Calculate number of elements to form the array of affinity data.
4340     llvm::Value *NumOfElements = nullptr;
4341     unsigned NumAffinities = 0;
4342     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4343       if (const Expr *Modifier = C->getModifier()) {
4344         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4345         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4346           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4347           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4348           NumOfElements =
4349               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4350         }
4351       } else {
4352         NumAffinities += C->varlist_size();
4353       }
4354     }
4355     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4356     // Fields ids in kmp_task_affinity_info record.
4357     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4358 
4359     QualType KmpTaskAffinityInfoArrayTy;
4360     if (NumOfElements) {
4361       NumOfElements = CGF.Builder.CreateNUWAdd(
4362           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4363       OpaqueValueExpr OVE(
4364           Loc,
4365           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4366           VK_RValue);
4367       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4368                                                     RValue::get(NumOfElements));
4369       KmpTaskAffinityInfoArrayTy =
4370           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4371                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4372       // Properly emit variable-sized array.
4373       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4374                                            ImplicitParamDecl::Other);
4375       CGF.EmitVarDecl(*PD);
4376       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4377       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4378                                                 /*isSigned=*/false);
4379     } else {
4380       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4381           KmpTaskAffinityInfoTy,
4382           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4383           ArrayType::Normal, /*IndexTypeQuals=*/0);
4384       AffinitiesArray =
4385           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4386       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4387       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4388                                              /*isSigned=*/false);
4389     }
4390 
4391     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4392     // Fill array by elements without iterators.
4393     unsigned Pos = 0;
4394     bool HasIterator = false;
4395     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4396       if (C->getModifier()) {
4397         HasIterator = true;
4398         continue;
4399       }
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         LValue Base =
4405             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4406                                KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         ++Pos;
4417       }
4418     }
4419     LValue PosLVal;
4420     if (HasIterator) {
4421       PosLVal = CGF.MakeAddrLValue(
4422           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4423           C.getSizeType());
4424       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4425     }
4426     // Process elements with iterators.
4427     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4428       const Expr *Modifier = C->getModifier();
4429       if (!Modifier)
4430         continue;
4431       OMPIteratorGeneratorScope IteratorScope(
4432           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4433       for (const Expr *E : C->varlists()) {
4434         llvm::Value *Addr;
4435         llvm::Value *Size;
4436         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4437         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4438         LValue Base = CGF.MakeAddrLValue(
4439             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4440                     AffinitiesArray.getAlignment()),
4441             KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         Idx = CGF.Builder.CreateNUWAdd(
4452             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4453         CGF.EmitStoreOfScalar(Idx, PosLVal);
4454       }
4455     }
4456     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4457     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4458     // naffins, kmp_task_affinity_info_t *affin_list);
4459     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4460     llvm::Value *GTid = getThreadID(CGF, Loc);
4461     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4462         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4463     // FIXME: Emit the function and ignore its result for now unless the
4464     // runtime function is properly implemented.
4465     (void)CGF.EmitRuntimeCall(
4466         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4467             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4468         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4469   }
4470   llvm::Value *NewTaskNewTaskTTy =
4471       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4472           NewTask, KmpTaskTWithPrivatesPtrTy);
4473   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4474                                                KmpTaskTWithPrivatesQTy);
4475   LValue TDBase =
4476       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4477   // Fill the data in the resulting kmp_task_t record.
4478   // Copy shareds if there are any.
4479   Address KmpTaskSharedsPtr = Address::invalid();
4480   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4481     KmpTaskSharedsPtr =
4482         Address(CGF.EmitLoadOfScalar(
4483                     CGF.EmitLValueForField(
4484                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4485                                            KmpTaskTShareds)),
4486                     Loc),
4487                 CGM.getNaturalTypeAlignment(SharedsTy));
4488     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4489     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4490     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4491   }
4492   // Emit initial values for private copies (if any).
4493   TaskResultTy Result;
4494   if (!Privates.empty()) {
4495     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4496                      SharedsTy, SharedsPtrTy, Data, Privates,
4497                      /*ForDup=*/false);
4498     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4499         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4500       Result.TaskDupFn = emitTaskDupFunction(
4501           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4502           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4503           /*WithLastIter=*/!Data.LastprivateVars.empty());
4504     }
4505   }
4506   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4507   enum { Priority = 0, Destructors = 1 };
4508   // Provide pointer to function with destructors for privates.
4509   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4510   const RecordDecl *KmpCmplrdataUD =
4511       (*FI)->getType()->getAsUnionType()->getDecl();
4512   if (NeedsCleanup) {
4513     llvm::Value *DestructorFn = emitDestructorsFunction(
4514         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4515         KmpTaskTWithPrivatesQTy);
4516     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4517     LValue DestructorsLV = CGF.EmitLValueForField(
4518         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4519     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4520                               DestructorFn, KmpRoutineEntryPtrTy),
4521                           DestructorsLV);
4522   }
4523   // Set priority.
4524   if (Data.Priority.getInt()) {
4525     LValue Data2LV = CGF.EmitLValueForField(
4526         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4527     LValue PriorityLV = CGF.EmitLValueForField(
4528         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4529     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4530   }
4531   Result.NewTask = NewTask;
4532   Result.TaskEntry = TaskEntry;
4533   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4534   Result.TDBase = TDBase;
4535   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4536   return Result;
4537 }
4538 
4539 namespace {
4540 /// Dependence kind for RTL.
4541 enum RTLDependenceKindTy {
4542   DepIn = 0x01,
4543   DepInOut = 0x3,
4544   DepMutexInOutSet = 0x4
4545 };
4546 /// Fields ids in kmp_depend_info record.
4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4548 } // namespace
4549 
4550 /// Translates internal dependency kind into the runtime kind.
4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4552   RTLDependenceKindTy DepKind;
4553   switch (K) {
4554   case OMPC_DEPEND_in:
4555     DepKind = DepIn;
4556     break;
4557   // Out and InOut dependencies must use the same code.
4558   case OMPC_DEPEND_out:
4559   case OMPC_DEPEND_inout:
4560     DepKind = DepInOut;
4561     break;
4562   case OMPC_DEPEND_mutexinoutset:
4563     DepKind = DepMutexInOutSet;
4564     break;
4565   case OMPC_DEPEND_source:
4566   case OMPC_DEPEND_sink:
4567   case OMPC_DEPEND_depobj:
4568   case OMPC_DEPEND_unknown:
4569     llvm_unreachable("Unknown task dependence type");
4570   }
4571   return DepKind;
4572 }
4573 
4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4576                            QualType &FlagsTy) {
4577   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4578   if (KmpDependInfoTy.isNull()) {
4579     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4580     KmpDependInfoRD->startDefinition();
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4583     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4584     KmpDependInfoRD->completeDefinition();
4585     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4586   }
4587 }
4588 
4589 std::pair<llvm::Value *, LValue>
4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4591                                    SourceLocation Loc) {
4592   ASTContext &C = CGM.getContext();
4593   QualType FlagsTy;
4594   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4595   RecordDecl *KmpDependInfoRD =
4596       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4597   LValue Base = CGF.EmitLoadOfPointerLValue(
4598       DepobjLVal.getAddress(CGF),
4599       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4600   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4601   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4602           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4603   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4604                             Base.getTBAAInfo());
4605   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4606       Addr.getPointer(),
4607       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4608   LValue NumDepsBase = CGF.MakeAddrLValue(
4609       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4610       Base.getBaseInfo(), Base.getTBAAInfo());
4611   // NumDeps = deps[i].base_addr;
4612   LValue BaseAddrLVal = CGF.EmitLValueForField(
4613       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4614   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4615   return std::make_pair(NumDeps, Base);
4616 }
4617 
4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4619                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4620                            const OMPTaskDataTy::DependData &Data,
4621                            Address DependenciesArray) {
4622   CodeGenModule &CGM = CGF.CGM;
4623   ASTContext &C = CGM.getContext();
4624   QualType FlagsTy;
4625   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626   RecordDecl *KmpDependInfoRD =
4627       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4628   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4629 
4630   OMPIteratorGeneratorScope IteratorScope(
4631       CGF, cast_or_null<OMPIteratorExpr>(
4632                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4633                                  : nullptr));
4634   for (const Expr *E : Data.DepExprs) {
4635     llvm::Value *Addr;
4636     llvm::Value *Size;
4637     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4638     LValue Base;
4639     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4640       Base = CGF.MakeAddrLValue(
4641           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4642     } else {
4643       LValue &PosLVal = *Pos.get<LValue *>();
4644       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4645       Base = CGF.MakeAddrLValue(
4646           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4647                   DependenciesArray.getAlignment()),
4648           KmpDependInfoTy);
4649     }
4650     // deps[i].base_addr = &<Dependencies[i].second>;
4651     LValue BaseAddrLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4653     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4654                           BaseAddrLVal);
4655     // deps[i].len = sizeof(<Dependencies[i].second>);
4656     LValue LenLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4658     CGF.EmitStoreOfScalar(Size, LenLVal);
4659     // deps[i].flags = <Dependencies[i].first>;
4660     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4661     LValue FlagsLVal = CGF.EmitLValueForField(
4662         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4663     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4664                           FlagsLVal);
4665     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4666       ++(*P);
4667     } else {
4668       LValue &PosLVal = *Pos.get<LValue *>();
4669       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4670       Idx = CGF.Builder.CreateNUWAdd(Idx,
4671                                      llvm::ConstantInt::get(Idx->getType(), 1));
4672       CGF.EmitStoreOfScalar(Idx, PosLVal);
4673     }
4674   }
4675 }
4676 
4677 static SmallVector<llvm::Value *, 4>
4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4679                         const OMPTaskDataTy::DependData &Data) {
4680   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4681          "Expected depobj dependecy kind.");
4682   SmallVector<llvm::Value *, 4> Sizes;
4683   SmallVector<LValue, 4> SizeLVals;
4684   ASTContext &C = CGF.getContext();
4685   QualType FlagsTy;
4686   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4687   RecordDecl *KmpDependInfoRD =
4688       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4689   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4690   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4691   {
4692     OMPIteratorGeneratorScope IteratorScope(
4693         CGF, cast_or_null<OMPIteratorExpr>(
4694                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4695                                    : nullptr));
4696     for (const Expr *E : Data.DepExprs) {
4697       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4698       LValue Base = CGF.EmitLoadOfPointerLValue(
4699           DepobjLVal.getAddress(CGF),
4700           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4701       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4702           Base.getAddress(CGF), KmpDependInfoPtrT);
4703       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4704                                 Base.getTBAAInfo());
4705       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4706           Addr.getPointer(),
4707           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4708       LValue NumDepsBase = CGF.MakeAddrLValue(
4709           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4710           Base.getBaseInfo(), Base.getTBAAInfo());
4711       // NumDeps = deps[i].base_addr;
4712       LValue BaseAddrLVal = CGF.EmitLValueForField(
4713           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4714       llvm::Value *NumDeps =
4715           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4716       LValue NumLVal = CGF.MakeAddrLValue(
4717           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4718           C.getUIntPtrType());
4719       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4720                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4721       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4722       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4723       CGF.EmitStoreOfScalar(Add, NumLVal);
4724       SizeLVals.push_back(NumLVal);
4725     }
4726   }
4727   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4728     llvm::Value *Size =
4729         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4730     Sizes.push_back(Size);
4731   }
4732   return Sizes;
4733 }
4734 
4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4736                                LValue PosLVal,
4737                                const OMPTaskDataTy::DependData &Data,
4738                                Address DependenciesArray) {
4739   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4740          "Expected depobj dependecy kind.");
4741   ASTContext &C = CGF.getContext();
4742   QualType FlagsTy;
4743   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4744   RecordDecl *KmpDependInfoRD =
4745       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4746   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4747   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4748   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4749   {
4750     OMPIteratorGeneratorScope IteratorScope(
4751         CGF, cast_or_null<OMPIteratorExpr>(
4752                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4753                                    : nullptr));
4754     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4755       const Expr *E = Data.DepExprs[I];
4756       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4757       LValue Base = CGF.EmitLoadOfPointerLValue(
4758           DepobjLVal.getAddress(CGF),
4759           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4760       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4761           Base.getAddress(CGF), KmpDependInfoPtrT);
4762       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4763                                 Base.getTBAAInfo());
4764 
4765       // Get number of elements in a single depobj.
4766       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4767           Addr.getPointer(),
4768           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4769       LValue NumDepsBase = CGF.MakeAddrLValue(
4770           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4771           Base.getBaseInfo(), Base.getTBAAInfo());
4772       // NumDeps = deps[i].base_addr;
4773       LValue BaseAddrLVal = CGF.EmitLValueForField(
4774           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4775       llvm::Value *NumDeps =
4776           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4777 
4778       // memcopy dependency data.
4779       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4780           ElSize,
4781           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4782       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4783       Address DepAddr =
4784           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4785                   DependenciesArray.getAlignment());
4786       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4787 
4788       // Increase pos.
4789       // pos += size;
4790       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4791       CGF.EmitStoreOfScalar(Add, PosLVal);
4792     }
4793   }
4794 }
4795 
4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4797     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4798     SourceLocation Loc) {
4799   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4800         return D.DepExprs.empty();
4801       }))
4802     return std::make_pair(nullptr, Address::invalid());
4803   // Process list of dependencies.
4804   ASTContext &C = CGM.getContext();
4805   Address DependenciesArray = Address::invalid();
4806   llvm::Value *NumOfElements = nullptr;
4807   unsigned NumDependencies = std::accumulate(
4808       Dependencies.begin(), Dependencies.end(), 0,
4809       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4810         return D.DepKind == OMPC_DEPEND_depobj
4811                    ? V
4812                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4813       });
4814   QualType FlagsTy;
4815   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4816   bool HasDepobjDeps = false;
4817   bool HasRegularWithIterators = false;
4818   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4819   llvm::Value *NumOfRegularWithIterators =
4820       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4821   // Calculate number of depobj dependecies and regular deps with the iterators.
4822   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4823     if (D.DepKind == OMPC_DEPEND_depobj) {
4824       SmallVector<llvm::Value *, 4> Sizes =
4825           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4826       for (llvm::Value *Size : Sizes) {
4827         NumOfDepobjElements =
4828             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4829       }
4830       HasDepobjDeps = true;
4831       continue;
4832     }
4833     // Include number of iterations, if any.
4834     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4835       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4836         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4837         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4838         NumOfRegularWithIterators =
4839             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4840       }
4841       HasRegularWithIterators = true;
4842       continue;
4843     }
4844   }
4845 
4846   QualType KmpDependInfoArrayTy;
4847   if (HasDepobjDeps || HasRegularWithIterators) {
4848     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4849                                            /*isSigned=*/false);
4850     if (HasDepobjDeps) {
4851       NumOfElements =
4852           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4853     }
4854     if (HasRegularWithIterators) {
4855       NumOfElements =
4856           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4857     }
4858     OpaqueValueExpr OVE(Loc,
4859                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4860                         VK_RValue);
4861     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4862                                                   RValue::get(NumOfElements));
4863     KmpDependInfoArrayTy =
4864         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4865                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4866     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4867     // Properly emit variable-sized array.
4868     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4869                                          ImplicitParamDecl::Other);
4870     CGF.EmitVarDecl(*PD);
4871     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4872     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4873                                               /*isSigned=*/false);
4874   } else {
4875     KmpDependInfoArrayTy = C.getConstantArrayType(
4876         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4877         ArrayType::Normal, /*IndexTypeQuals=*/0);
4878     DependenciesArray =
4879         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4880     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4881     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4882                                            /*isSigned=*/false);
4883   }
4884   unsigned Pos = 0;
4885   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4886     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4887         Dependencies[I].IteratorExpr)
4888       continue;
4889     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4890                    DependenciesArray);
4891   }
4892   // Copy regular dependecies with iterators.
4893   LValue PosLVal = CGF.MakeAddrLValue(
4894       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4895   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4896   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4897     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4898         !Dependencies[I].IteratorExpr)
4899       continue;
4900     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4901                    DependenciesArray);
4902   }
4903   // Copy final depobj arrays without iterators.
4904   if (HasDepobjDeps) {
4905     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4906       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4907         continue;
4908       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4909                          DependenciesArray);
4910     }
4911   }
4912   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4913       DependenciesArray, CGF.VoidPtrTy);
4914   return std::make_pair(NumOfElements, DependenciesArray);
4915 }
4916 
4917 Address CGOpenMPRuntime::emitDepobjDependClause(
4918     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4919     SourceLocation Loc) {
4920   if (Dependencies.DepExprs.empty())
4921     return Address::invalid();
4922   // Process list of dependencies.
4923   ASTContext &C = CGM.getContext();
4924   Address DependenciesArray = Address::invalid();
4925   unsigned NumDependencies = Dependencies.DepExprs.size();
4926   QualType FlagsTy;
4927   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4928   RecordDecl *KmpDependInfoRD =
4929       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4930 
4931   llvm::Value *Size;
4932   // Define type kmp_depend_info[<Dependencies.size()>];
4933   // For depobj reserve one extra element to store the number of elements.
4934   // It is required to handle depobj(x) update(in) construct.
4935   // kmp_depend_info[<Dependencies.size()>] deps;
4936   llvm::Value *NumDepsVal;
4937   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4938   if (const auto *IE =
4939           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4940     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4941     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4942       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4943       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4944       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4945     }
4946     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4947                                     NumDepsVal);
4948     CharUnits SizeInBytes =
4949         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4950     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4951     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4952     NumDepsVal =
4953         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4954   } else {
4955     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4956         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4957         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4958     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4959     Size = CGM.getSize(Sz.alignTo(Align));
4960     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4961   }
4962   // Need to allocate on the dynamic memory.
4963   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4964   // Use default allocator.
4965   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4966   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4967 
4968   llvm::Value *Addr =
4969       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4970                               CGM.getModule(), OMPRTL___kmpc_alloc),
4971                           Args, ".dep.arr.addr");
4972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4973       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4974   DependenciesArray = Address(Addr, Align);
4975   // Write number of elements in the first element of array for depobj.
4976   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4977   // deps[i].base_addr = NumDependencies;
4978   LValue BaseAddrLVal = CGF.EmitLValueForField(
4979       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4980   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4981   llvm::PointerUnion<unsigned *, LValue *> Pos;
4982   unsigned Idx = 1;
4983   LValue PosLVal;
4984   if (Dependencies.IteratorExpr) {
4985     PosLVal = CGF.MakeAddrLValue(
4986         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4987         C.getSizeType());
4988     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4989                           /*IsInit=*/true);
4990     Pos = &PosLVal;
4991   } else {
4992     Pos = &Idx;
4993   }
4994   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4995   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4996       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4997   return DependenciesArray;
4998 }
4999 
5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5001                                         SourceLocation Loc) {
5002   ASTContext &C = CGM.getContext();
5003   QualType FlagsTy;
5004   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5005   LValue Base = CGF.EmitLoadOfPointerLValue(
5006       DepobjLVal.getAddress(CGF),
5007       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5008   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5009   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5010       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5011   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5012       Addr.getPointer(),
5013       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5014   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5015                                                                CGF.VoidPtrTy);
5016   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5017   // Use default allocator.
5018   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5019   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5020 
5021   // _kmpc_free(gtid, addr, nullptr);
5022   (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5023                                 CGM.getModule(), OMPRTL___kmpc_free),
5024                             Args);
5025 }
5026 
5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5028                                        OpenMPDependClauseKind NewDepKind,
5029                                        SourceLocation Loc) {
5030   ASTContext &C = CGM.getContext();
5031   QualType FlagsTy;
5032   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5033   RecordDecl *KmpDependInfoRD =
5034       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5035   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5036   llvm::Value *NumDeps;
5037   LValue Base;
5038   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5039 
5040   Address Begin = Base.getAddress(CGF);
5041   // Cast from pointer to array type to pointer to single element.
5042   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5043   // The basic structure here is a while-do loop.
5044   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5045   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5046   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5047   CGF.EmitBlock(BodyBB);
5048   llvm::PHINode *ElementPHI =
5049       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5050   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5051   Begin = Address(ElementPHI, Begin.getAlignment());
5052   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5053                             Base.getTBAAInfo());
5054   // deps[i].flags = NewDepKind;
5055   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5056   LValue FlagsLVal = CGF.EmitLValueForField(
5057       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5058   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5059                         FlagsLVal);
5060 
5061   // Shift the address forward by one element.
5062   Address ElementNext =
5063       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5064   ElementPHI->addIncoming(ElementNext.getPointer(),
5065                           CGF.Builder.GetInsertBlock());
5066   llvm::Value *IsEmpty =
5067       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5068   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5069   // Done.
5070   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5071 }
5072 
5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5074                                    const OMPExecutableDirective &D,
5075                                    llvm::Function *TaskFunction,
5076                                    QualType SharedsTy, Address Shareds,
5077                                    const Expr *IfCond,
5078                                    const OMPTaskDataTy &Data) {
5079   if (!CGF.HaveInsertPoint())
5080     return;
5081 
5082   TaskResultTy Result =
5083       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5084   llvm::Value *NewTask = Result.NewTask;
5085   llvm::Function *TaskEntry = Result.TaskEntry;
5086   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5087   LValue TDBase = Result.TDBase;
5088   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5089   // Process list of dependences.
5090   Address DependenciesArray = Address::invalid();
5091   llvm::Value *NumOfElements;
5092   std::tie(NumOfElements, DependenciesArray) =
5093       emitDependClause(CGF, Data.Dependences, Loc);
5094 
5095   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5096   // libcall.
5097   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5098   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5099   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5100   // list is not empty
5101   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5102   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5103   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5104   llvm::Value *DepTaskArgs[7];
5105   if (!Data.Dependences.empty()) {
5106     DepTaskArgs[0] = UpLoc;
5107     DepTaskArgs[1] = ThreadID;
5108     DepTaskArgs[2] = NewTask;
5109     DepTaskArgs[3] = NumOfElements;
5110     DepTaskArgs[4] = DependenciesArray.getPointer();
5111     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5112     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5113   }
5114   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5115                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5116     if (!Data.Tied) {
5117       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5118       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5119       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5120     }
5121     if (!Data.Dependences.empty()) {
5122       CGF.EmitRuntimeCall(
5123           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5124               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5125           DepTaskArgs);
5126     } else {
5127       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5128                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5129                           TaskArgs);
5130     }
5131     // Check if parent region is untied and build return for untied task;
5132     if (auto *Region =
5133             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5134       Region->emitUntiedSwitch(CGF);
5135   };
5136 
5137   llvm::Value *DepWaitTaskArgs[6];
5138   if (!Data.Dependences.empty()) {
5139     DepWaitTaskArgs[0] = UpLoc;
5140     DepWaitTaskArgs[1] = ThreadID;
5141     DepWaitTaskArgs[2] = NumOfElements;
5142     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5143     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5144     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5145   }
5146   auto &M = CGM.getModule();
5147   auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5148                         &Data, &DepWaitTaskArgs,
5149                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5150     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5151     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5152     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5153     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5154     // is specified.
5155     if (!Data.Dependences.empty())
5156       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5157                               M, OMPRTL___kmpc_omp_wait_deps),
5158                           DepWaitTaskArgs);
5159     // Call proxy_task_entry(gtid, new_task);
5160     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5161                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5162       Action.Enter(CGF);
5163       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5164       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5165                                                           OutlinedFnArgs);
5166     };
5167 
5168     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5169     // kmp_task_t *new_task);
5170     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5171     // kmp_task_t *new_task);
5172     RegionCodeGenTy RCG(CodeGen);
5173     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5174                               M, OMPRTL___kmpc_omp_task_begin_if0),
5175                           TaskArgs,
5176                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5177                               M, OMPRTL___kmpc_omp_task_complete_if0),
5178                           TaskArgs);
5179     RCG.setAction(Action);
5180     RCG(CGF);
5181   };
5182 
5183   if (IfCond) {
5184     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5185   } else {
5186     RegionCodeGenTy ThenRCG(ThenCodeGen);
5187     ThenRCG(CGF);
5188   }
5189 }
5190 
5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5192                                        const OMPLoopDirective &D,
5193                                        llvm::Function *TaskFunction,
5194                                        QualType SharedsTy, Address Shareds,
5195                                        const Expr *IfCond,
5196                                        const OMPTaskDataTy &Data) {
5197   if (!CGF.HaveInsertPoint())
5198     return;
5199   TaskResultTy Result =
5200       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5201   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5202   // libcall.
5203   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5204   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5205   // sched, kmp_uint64 grainsize, void *task_dup);
5206   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5207   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5208   llvm::Value *IfVal;
5209   if (IfCond) {
5210     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5211                                       /*isSigned=*/true);
5212   } else {
5213     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5214   }
5215 
5216   LValue LBLVal = CGF.EmitLValueForField(
5217       Result.TDBase,
5218       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5219   const auto *LBVar =
5220       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5221   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5222                        LBLVal.getQuals(),
5223                        /*IsInitializer=*/true);
5224   LValue UBLVal = CGF.EmitLValueForField(
5225       Result.TDBase,
5226       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5227   const auto *UBVar =
5228       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5229   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5230                        UBLVal.getQuals(),
5231                        /*IsInitializer=*/true);
5232   LValue StLVal = CGF.EmitLValueForField(
5233       Result.TDBase,
5234       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5235   const auto *StVar =
5236       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5237   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5238                        StLVal.getQuals(),
5239                        /*IsInitializer=*/true);
5240   // Store reductions address.
5241   LValue RedLVal = CGF.EmitLValueForField(
5242       Result.TDBase,
5243       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5244   if (Data.Reductions) {
5245     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5246   } else {
5247     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5248                                CGF.getContext().VoidPtrTy);
5249   }
5250   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5251   llvm::Value *TaskArgs[] = {
5252       UpLoc,
5253       ThreadID,
5254       Result.NewTask,
5255       IfVal,
5256       LBLVal.getPointer(CGF),
5257       UBLVal.getPointer(CGF),
5258       CGF.EmitLoadOfScalar(StLVal, Loc),
5259       llvm::ConstantInt::getSigned(
5260           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5261       llvm::ConstantInt::getSigned(
5262           CGF.IntTy, Data.Schedule.getPointer()
5263                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5264                          : NoSchedule),
5265       Data.Schedule.getPointer()
5266           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5267                                       /*isSigned=*/false)
5268           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5269       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5270                              Result.TaskDupFn, CGF.VoidPtrTy)
5271                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5272   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5273                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5274                       TaskArgs);
5275 }
5276 
5277 /// Emit reduction operation for each element of array (required for
5278 /// array sections) LHS op = RHS.
5279 /// \param Type Type of array.
5280 /// \param LHSVar Variable on the left side of the reduction operation
5281 /// (references element of array in original variable).
5282 /// \param RHSVar Variable on the right side of the reduction operation
5283 /// (references element of array in original variable).
5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5285 /// RHSVar.
5286 static void EmitOMPAggregateReduction(
5287     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5288     const VarDecl *RHSVar,
5289     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5290                                   const Expr *, const Expr *)> &RedOpGen,
5291     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5292     const Expr *UpExpr = nullptr) {
5293   // Perform element-by-element initialization.
5294   QualType ElementTy;
5295   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5296   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5297 
5298   // Drill down to the base element type on both arrays.
5299   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5300   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5301 
5302   llvm::Value *RHSBegin = RHSAddr.getPointer();
5303   llvm::Value *LHSBegin = LHSAddr.getPointer();
5304   // Cast from pointer to array type to pointer to single element.
5305   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5306   // The basic structure here is a while-do loop.
5307   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5308   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5309   llvm::Value *IsEmpty =
5310       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5311   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5312 
5313   // Enter the loop body, making that address the current address.
5314   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5315   CGF.EmitBlock(BodyBB);
5316 
5317   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5318 
5319   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5320       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5321   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5322   Address RHSElementCurrent =
5323       Address(RHSElementPHI,
5324               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5325 
5326   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5327       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5328   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5329   Address LHSElementCurrent =
5330       Address(LHSElementPHI,
5331               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5332 
5333   // Emit copy.
5334   CodeGenFunction::OMPPrivateScope Scope(CGF);
5335   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5336   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5337   Scope.Privatize();
5338   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5339   Scope.ForceCleanup();
5340 
5341   // Shift the address forward by one element.
5342   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5343       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5344   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5345       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5346   // Check whether we've reached the end.
5347   llvm::Value *Done =
5348       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5349   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5350   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5351   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5352 
5353   // Done.
5354   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5355 }
5356 
5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5359 /// UDR combiner function.
5360 static void emitReductionCombiner(CodeGenFunction &CGF,
5361                                   const Expr *ReductionOp) {
5362   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5363     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5364       if (const auto *DRE =
5365               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5366         if (const auto *DRD =
5367                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5368           std::pair<llvm::Function *, llvm::Function *> Reduction =
5369               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5370           RValue Func = RValue::get(Reduction.first);
5371           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5372           CGF.EmitIgnoredExpr(ReductionOp);
5373           return;
5374         }
5375   CGF.EmitIgnoredExpr(ReductionOp);
5376 }
5377 
5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5379     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5380     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5381     ArrayRef<const Expr *> ReductionOps) {
5382   ASTContext &C = CGM.getContext();
5383 
5384   // void reduction_func(void *LHSArg, void *RHSArg);
5385   FunctionArgList Args;
5386   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5387                            ImplicitParamDecl::Other);
5388   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5389                            ImplicitParamDecl::Other);
5390   Args.push_back(&LHSArg);
5391   Args.push_back(&RHSArg);
5392   const auto &CGFI =
5393       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5394   std::string Name = getName({"omp", "reduction", "reduction_func"});
5395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5396                                     llvm::GlobalValue::InternalLinkage, Name,
5397                                     &CGM.getModule());
5398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5399   Fn->setDoesNotRecurse();
5400   CodeGenFunction CGF(CGM);
5401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5402 
5403   // Dst = (void*[n])(LHSArg);
5404   // Src = (void*[n])(RHSArg);
5405   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5406       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5407       ArgsType), CGF.getPointerAlign());
5408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5409       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5410       ArgsType), CGF.getPointerAlign());
5411 
5412   //  ...
5413   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5414   //  ...
5415   CodeGenFunction::OMPPrivateScope Scope(CGF);
5416   auto IPriv = Privates.begin();
5417   unsigned Idx = 0;
5418   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5419     const auto *RHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5421     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5423     });
5424     const auto *LHSVar =
5425         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5426     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5427       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5428     });
5429     QualType PrivTy = (*IPriv)->getType();
5430     if (PrivTy->isVariablyModifiedType()) {
5431       // Get array size and emit VLA type.
5432       ++Idx;
5433       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5434       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5435       const VariableArrayType *VLA =
5436           CGF.getContext().getAsVariableArrayType(PrivTy);
5437       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5438       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5439           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5440       CGF.EmitVariablyModifiedType(PrivTy);
5441     }
5442   }
5443   Scope.Privatize();
5444   IPriv = Privates.begin();
5445   auto ILHS = LHSExprs.begin();
5446   auto IRHS = RHSExprs.begin();
5447   for (const Expr *E : ReductionOps) {
5448     if ((*IPriv)->getType()->isArrayType()) {
5449       // Emit reduction for array section.
5450       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5451       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5452       EmitOMPAggregateReduction(
5453           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5454           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5455             emitReductionCombiner(CGF, E);
5456           });
5457     } else {
5458       // Emit reduction for array subscript or single variable.
5459       emitReductionCombiner(CGF, E);
5460     }
5461     ++IPriv;
5462     ++ILHS;
5463     ++IRHS;
5464   }
5465   Scope.ForceCleanup();
5466   CGF.FinishFunction();
5467   return Fn;
5468 }
5469 
5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5471                                                   const Expr *ReductionOp,
5472                                                   const Expr *PrivateRef,
5473                                                   const DeclRefExpr *LHS,
5474                                                   const DeclRefExpr *RHS) {
5475   if (PrivateRef->getType()->isArrayType()) {
5476     // Emit reduction for array section.
5477     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5478     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5479     EmitOMPAggregateReduction(
5480         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5481         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5482           emitReductionCombiner(CGF, ReductionOp);
5483         });
5484   } else {
5485     // Emit reduction for array subscript or single variable.
5486     emitReductionCombiner(CGF, ReductionOp);
5487   }
5488 }
5489 
5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5491                                     ArrayRef<const Expr *> Privates,
5492                                     ArrayRef<const Expr *> LHSExprs,
5493                                     ArrayRef<const Expr *> RHSExprs,
5494                                     ArrayRef<const Expr *> ReductionOps,
5495                                     ReductionOptionsTy Options) {
5496   if (!CGF.HaveInsertPoint())
5497     return;
5498 
5499   bool WithNowait = Options.WithNowait;
5500   bool SimpleReduction = Options.SimpleReduction;
5501 
5502   // Next code should be emitted for reduction:
5503   //
5504   // static kmp_critical_name lock = { 0 };
5505   //
5506   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5507   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5508   //  ...
5509   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5510   //  *(Type<n>-1*)rhs[<n>-1]);
5511   // }
5512   //
5513   // ...
5514   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5515   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5516   // RedList, reduce_func, &<lock>)) {
5517   // case 1:
5518   //  ...
5519   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5520   //  ...
5521   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5522   // break;
5523   // case 2:
5524   //  ...
5525   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5526   //  ...
5527   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5528   // break;
5529   // default:;
5530   // }
5531   //
5532   // if SimpleReduction is true, only the next code is generated:
5533   //  ...
5534   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5535   //  ...
5536 
5537   ASTContext &C = CGM.getContext();
5538 
5539   if (SimpleReduction) {
5540     CodeGenFunction::RunCleanupsScope Scope(CGF);
5541     auto IPriv = Privates.begin();
5542     auto ILHS = LHSExprs.begin();
5543     auto IRHS = RHSExprs.begin();
5544     for (const Expr *E : ReductionOps) {
5545       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5546                                   cast<DeclRefExpr>(*IRHS));
5547       ++IPriv;
5548       ++ILHS;
5549       ++IRHS;
5550     }
5551     return;
5552   }
5553 
5554   // 1. Build a list of reduction variables.
5555   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5556   auto Size = RHSExprs.size();
5557   for (const Expr *E : Privates) {
5558     if (E->getType()->isVariablyModifiedType())
5559       // Reserve place for array size.
5560       ++Size;
5561   }
5562   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5563   QualType ReductionArrayTy =
5564       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5565                              /*IndexTypeQuals=*/0);
5566   Address ReductionList =
5567       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5568   auto IPriv = Privates.begin();
5569   unsigned Idx = 0;
5570   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5571     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5572     CGF.Builder.CreateStore(
5573         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5574             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5575         Elem);
5576     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5577       // Store array size.
5578       ++Idx;
5579       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5580       llvm::Value *Size = CGF.Builder.CreateIntCast(
5581           CGF.getVLASize(
5582                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5583               .NumElts,
5584           CGF.SizeTy, /*isSigned=*/false);
5585       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5586                               Elem);
5587     }
5588   }
5589 
5590   // 2. Emit reduce_func().
5591   llvm::Function *ReductionFn = emitReductionFunction(
5592       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5593       LHSExprs, RHSExprs, ReductionOps);
5594 
5595   // 3. Create static kmp_critical_name lock = { 0 };
5596   std::string Name = getName({"reduction"});
5597   llvm::Value *Lock = getCriticalRegionLock(Name);
5598 
5599   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5600   // RedList, reduce_func, &<lock>);
5601   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5602   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5603   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5604   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5605       ReductionList.getPointer(), CGF.VoidPtrTy);
5606   llvm::Value *Args[] = {
5607       IdentTLoc,                             // ident_t *<loc>
5608       ThreadId,                              // i32 <gtid>
5609       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5610       ReductionArrayTySize,                  // size_type sizeof(RedList)
5611       RL,                                    // void *RedList
5612       ReductionFn, // void (*) (void *, void *) <reduce_func>
5613       Lock         // kmp_critical_name *&<lock>
5614   };
5615   llvm::Value *Res = CGF.EmitRuntimeCall(
5616       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5617           CGM.getModule(),
5618           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5619       Args);
5620 
5621   // 5. Build switch(res)
5622   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5623   llvm::SwitchInst *SwInst =
5624       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5625 
5626   // 6. Build case 1:
5627   //  ...
5628   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5629   //  ...
5630   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5631   // break;
5632   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5633   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5634   CGF.EmitBlock(Case1BB);
5635 
5636   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5637   llvm::Value *EndArgs[] = {
5638       IdentTLoc, // ident_t *<loc>
5639       ThreadId,  // i32 <gtid>
5640       Lock       // kmp_critical_name *&<lock>
5641   };
5642   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5643                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5644     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5645     auto IPriv = Privates.begin();
5646     auto ILHS = LHSExprs.begin();
5647     auto IRHS = RHSExprs.begin();
5648     for (const Expr *E : ReductionOps) {
5649       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5650                                      cast<DeclRefExpr>(*IRHS));
5651       ++IPriv;
5652       ++ILHS;
5653       ++IRHS;
5654     }
5655   };
5656   RegionCodeGenTy RCG(CodeGen);
5657   CommonActionTy Action(
5658       nullptr, llvm::None,
5659       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5660           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5661                                       : OMPRTL___kmpc_end_reduce),
5662       EndArgs);
5663   RCG.setAction(Action);
5664   RCG(CGF);
5665 
5666   CGF.EmitBranch(DefaultBB);
5667 
5668   // 7. Build case 2:
5669   //  ...
5670   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5671   //  ...
5672   // break;
5673   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5674   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5675   CGF.EmitBlock(Case2BB);
5676 
5677   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5678                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     auto ILHS = LHSExprs.begin();
5680     auto IRHS = RHSExprs.begin();
5681     auto IPriv = Privates.begin();
5682     for (const Expr *E : ReductionOps) {
5683       const Expr *XExpr = nullptr;
5684       const Expr *EExpr = nullptr;
5685       const Expr *UpExpr = nullptr;
5686       BinaryOperatorKind BO = BO_Comma;
5687       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5688         if (BO->getOpcode() == BO_Assign) {
5689           XExpr = BO->getLHS();
5690           UpExpr = BO->getRHS();
5691         }
5692       }
5693       // Try to emit update expression as a simple atomic.
5694       const Expr *RHSExpr = UpExpr;
5695       if (RHSExpr) {
5696         // Analyze RHS part of the whole expression.
5697         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5698                 RHSExpr->IgnoreParenImpCasts())) {
5699           // If this is a conditional operator, analyze its condition for
5700           // min/max reduction operator.
5701           RHSExpr = ACO->getCond();
5702         }
5703         if (const auto *BORHS =
5704                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5705           EExpr = BORHS->getRHS();
5706           BO = BORHS->getOpcode();
5707         }
5708       }
5709       if (XExpr) {
5710         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5711         auto &&AtomicRedGen = [BO, VD,
5712                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5713                                     const Expr *EExpr, const Expr *UpExpr) {
5714           LValue X = CGF.EmitLValue(XExpr);
5715           RValue E;
5716           if (EExpr)
5717             E = CGF.EmitAnyExpr(EExpr);
5718           CGF.EmitOMPAtomicSimpleUpdateExpr(
5719               X, E, BO, /*IsXLHSInRHSPart=*/true,
5720               llvm::AtomicOrdering::Monotonic, Loc,
5721               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5722                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5723                 PrivateScope.addPrivate(
5724                     VD, [&CGF, VD, XRValue, Loc]() {
5725                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5726                       CGF.emitOMPSimpleStore(
5727                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5728                           VD->getType().getNonReferenceType(), Loc);
5729                       return LHSTemp;
5730                     });
5731                 (void)PrivateScope.Privatize();
5732                 return CGF.EmitAnyExpr(UpExpr);
5733               });
5734         };
5735         if ((*IPriv)->getType()->isArrayType()) {
5736           // Emit atomic reduction for array section.
5737           const auto *RHSVar =
5738               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5739           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5740                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5741         } else {
5742           // Emit atomic reduction for array subscript or single variable.
5743           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5744         }
5745       } else {
5746         // Emit as a critical region.
5747         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5748                                            const Expr *, const Expr *) {
5749           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5750           std::string Name = RT.getName({"atomic_reduction"});
5751           RT.emitCriticalRegion(
5752               CGF, Name,
5753               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5754                 Action.Enter(CGF);
5755                 emitReductionCombiner(CGF, E);
5756               },
5757               Loc);
5758         };
5759         if ((*IPriv)->getType()->isArrayType()) {
5760           const auto *LHSVar =
5761               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5762           const auto *RHSVar =
5763               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5764           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5765                                     CritRedGen);
5766         } else {
5767           CritRedGen(CGF, nullptr, nullptr, nullptr);
5768         }
5769       }
5770       ++ILHS;
5771       ++IRHS;
5772       ++IPriv;
5773     }
5774   };
5775   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5776   if (!WithNowait) {
5777     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5778     llvm::Value *EndArgs[] = {
5779         IdentTLoc, // ident_t *<loc>
5780         ThreadId,  // i32 <gtid>
5781         Lock       // kmp_critical_name *&<lock>
5782     };
5783     CommonActionTy Action(nullptr, llvm::None,
5784                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5785                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5786                           EndArgs);
5787     AtomicRCG.setAction(Action);
5788     AtomicRCG(CGF);
5789   } else {
5790     AtomicRCG(CGF);
5791   }
5792 
5793   CGF.EmitBranch(DefaultBB);
5794   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5795 }
5796 
5797 /// Generates unique name for artificial threadprivate variables.
5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5800                                       const Expr *Ref) {
5801   SmallString<256> Buffer;
5802   llvm::raw_svector_ostream Out(Buffer);
5803   const clang::DeclRefExpr *DE;
5804   const VarDecl *D = ::getBaseDecl(Ref, DE);
5805   if (!D)
5806     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5807   D = D->getCanonicalDecl();
5808   std::string Name = CGM.getOpenMPRuntime().getName(
5809       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5810   Out << Prefix << Name << "_"
5811       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5812   return std::string(Out.str());
5813 }
5814 
5815 /// Emits reduction initializer function:
5816 /// \code
5817 /// void @.red_init(void* %arg, void* %orig) {
5818 /// %0 = bitcast void* %arg to <type>*
5819 /// store <type> <init>, <type>* %0
5820 /// ret void
5821 /// }
5822 /// \endcode
5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5824                                            SourceLocation Loc,
5825                                            ReductionCodeGen &RCG, unsigned N) {
5826   ASTContext &C = CGM.getContext();
5827   QualType VoidPtrTy = C.VoidPtrTy;
5828   VoidPtrTy.addRestrict();
5829   FunctionArgList Args;
5830   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5831                           ImplicitParamDecl::Other);
5832   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5833                               ImplicitParamDecl::Other);
5834   Args.emplace_back(&Param);
5835   Args.emplace_back(&ParamOrig);
5836   const auto &FnInfo =
5837       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5838   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5839   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5840   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5841                                     Name, &CGM.getModule());
5842   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5843   Fn->setDoesNotRecurse();
5844   CodeGenFunction CGF(CGM);
5845   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5846   Address PrivateAddr = CGF.EmitLoadOfPointer(
5847       CGF.GetAddrOfLocalVar(&Param),
5848       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5849   llvm::Value *Size = nullptr;
5850   // If the size of the reduction item is non-constant, load it from global
5851   // threadprivate variable.
5852   if (RCG.getSizes(N).second) {
5853     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5854         CGF, CGM.getContext().getSizeType(),
5855         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5856     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5857                                 CGM.getContext().getSizeType(), Loc);
5858   }
5859   RCG.emitAggregateType(CGF, N, Size);
5860   LValue OrigLVal;
5861   // If initializer uses initializer from declare reduction construct, emit a
5862   // pointer to the address of the original reduction item (reuired by reduction
5863   // initializer)
5864   if (RCG.usesReductionInitializer(N)) {
5865     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5866     SharedAddr = CGF.EmitLoadOfPointer(
5867         SharedAddr,
5868         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5869     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5870   } else {
5871     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5872         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5873         CGM.getContext().VoidPtrTy);
5874   }
5875   // Emit the initializer:
5876   // %0 = bitcast void* %arg to <type>*
5877   // store <type> <init>, <type>* %0
5878   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5879                          [](CodeGenFunction &) { return false; });
5880   CGF.FinishFunction();
5881   return Fn;
5882 }
5883 
5884 /// Emits reduction combiner function:
5885 /// \code
5886 /// void @.red_comb(void* %arg0, void* %arg1) {
5887 /// %lhs = bitcast void* %arg0 to <type>*
5888 /// %rhs = bitcast void* %arg1 to <type>*
5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5890 /// store <type> %2, <type>* %lhs
5891 /// ret void
5892 /// }
5893 /// \endcode
5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5895                                            SourceLocation Loc,
5896                                            ReductionCodeGen &RCG, unsigned N,
5897                                            const Expr *ReductionOp,
5898                                            const Expr *LHS, const Expr *RHS,
5899                                            const Expr *PrivateRef) {
5900   ASTContext &C = CGM.getContext();
5901   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5902   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5903   FunctionArgList Args;
5904   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5905                                C.VoidPtrTy, ImplicitParamDecl::Other);
5906   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5907                             ImplicitParamDecl::Other);
5908   Args.emplace_back(&ParamInOut);
5909   Args.emplace_back(&ParamIn);
5910   const auto &FnInfo =
5911       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5912   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5913   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5914   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5915                                     Name, &CGM.getModule());
5916   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5917   Fn->setDoesNotRecurse();
5918   CodeGenFunction CGF(CGM);
5919   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5920   llvm::Value *Size = nullptr;
5921   // If the size of the reduction item is non-constant, load it from global
5922   // threadprivate variable.
5923   if (RCG.getSizes(N).second) {
5924     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5925         CGF, CGM.getContext().getSizeType(),
5926         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5927     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5928                                 CGM.getContext().getSizeType(), Loc);
5929   }
5930   RCG.emitAggregateType(CGF, N, Size);
5931   // Remap lhs and rhs variables to the addresses of the function arguments.
5932   // %lhs = bitcast void* %arg0 to <type>*
5933   // %rhs = bitcast void* %arg1 to <type>*
5934   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5935   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5936     // Pull out the pointer to the variable.
5937     Address PtrAddr = CGF.EmitLoadOfPointer(
5938         CGF.GetAddrOfLocalVar(&ParamInOut),
5939         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5940     return CGF.Builder.CreateElementBitCast(
5941         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5942   });
5943   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5944     // Pull out the pointer to the variable.
5945     Address PtrAddr = CGF.EmitLoadOfPointer(
5946         CGF.GetAddrOfLocalVar(&ParamIn),
5947         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5948     return CGF.Builder.CreateElementBitCast(
5949         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5950   });
5951   PrivateScope.Privatize();
5952   // Emit the combiner body:
5953   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5954   // store <type> %2, <type>* %lhs
5955   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5956       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5957       cast<DeclRefExpr>(RHS));
5958   CGF.FinishFunction();
5959   return Fn;
5960 }
5961 
5962 /// Emits reduction finalizer function:
5963 /// \code
5964 /// void @.red_fini(void* %arg) {
5965 /// %0 = bitcast void* %arg to <type>*
5966 /// <destroy>(<type>* %0)
5967 /// ret void
5968 /// }
5969 /// \endcode
5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5971                                            SourceLocation Loc,
5972                                            ReductionCodeGen &RCG, unsigned N) {
5973   if (!RCG.needCleanups(N))
5974     return nullptr;
5975   ASTContext &C = CGM.getContext();
5976   FunctionArgList Args;
5977   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5978                           ImplicitParamDecl::Other);
5979   Args.emplace_back(&Param);
5980   const auto &FnInfo =
5981       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5982   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5983   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5984   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5985                                     Name, &CGM.getModule());
5986   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5987   Fn->setDoesNotRecurse();
5988   CodeGenFunction CGF(CGM);
5989   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5990   Address PrivateAddr = CGF.EmitLoadOfPointer(
5991       CGF.GetAddrOfLocalVar(&Param),
5992       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5993   llvm::Value *Size = nullptr;
5994   // If the size of the reduction item is non-constant, load it from global
5995   // threadprivate variable.
5996   if (RCG.getSizes(N).second) {
5997     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5998         CGF, CGM.getContext().getSizeType(),
5999         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6000     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6001                                 CGM.getContext().getSizeType(), Loc);
6002   }
6003   RCG.emitAggregateType(CGF, N, Size);
6004   // Emit the finalizer body:
6005   // <destroy>(<type>* %0)
6006   RCG.emitCleanups(CGF, N, PrivateAddr);
6007   CGF.FinishFunction(Loc);
6008   return Fn;
6009 }
6010 
6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6012     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6013     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6014   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6015     return nullptr;
6016 
6017   // Build typedef struct:
6018   // kmp_taskred_input {
6019   //   void *reduce_shar; // shared reduction item
6020   //   void *reduce_orig; // original reduction item used for initialization
6021   //   size_t reduce_size; // size of data item
6022   //   void *reduce_init; // data initialization routine
6023   //   void *reduce_fini; // data finalization routine
6024   //   void *reduce_comb; // data combiner routine
6025   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6026   // } kmp_taskred_input_t;
6027   ASTContext &C = CGM.getContext();
6028   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6029   RD->startDefinition();
6030   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6032   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6033   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6036   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6037       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6038   RD->completeDefinition();
6039   QualType RDType = C.getRecordType(RD);
6040   unsigned Size = Data.ReductionVars.size();
6041   llvm::APInt ArraySize(/*numBits=*/64, Size);
6042   QualType ArrayRDType = C.getConstantArrayType(
6043       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6044   // kmp_task_red_input_t .rd_input.[Size];
6045   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6046   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6047                        Data.ReductionCopies, Data.ReductionOps);
6048   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6049     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6050     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6051                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6052     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6053         TaskRedInput.getPointer(), Idxs,
6054         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6055         ".rd_input.gep.");
6056     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6057     // ElemLVal.reduce_shar = &Shareds[Cnt];
6058     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6059     RCG.emitSharedOrigLValue(CGF, Cnt);
6060     llvm::Value *CastedShared =
6061         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6063     // ElemLVal.reduce_orig = &Origs[Cnt];
6064     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6065     llvm::Value *CastedOrig =
6066         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6067     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6068     RCG.emitAggregateType(CGF, Cnt);
6069     llvm::Value *SizeValInChars;
6070     llvm::Value *SizeVal;
6071     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6072     // We use delayed creation/initialization for VLAs and array sections. It is
6073     // required because runtime does not provide the way to pass the sizes of
6074     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6075     // threadprivate global variables are used to store these values and use
6076     // them in the functions.
6077     bool DelayedCreation = !!SizeVal;
6078     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6079                                                /*isSigned=*/false);
6080     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6081     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6082     // ElemLVal.reduce_init = init;
6083     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6084     llvm::Value *InitAddr =
6085         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6086     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6087     // ElemLVal.reduce_fini = fini;
6088     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6089     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6090     llvm::Value *FiniAddr = Fini
6091                                 ? CGF.EmitCastToVoidPtr(Fini)
6092                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6093     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6094     // ElemLVal.reduce_comb = comb;
6095     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6096     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6097         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6098         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6099     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6100     // ElemLVal.flags = 0;
6101     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6102     if (DelayedCreation) {
6103       CGF.EmitStoreOfScalar(
6104           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6105           FlagsLVal);
6106     } else
6107       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6108                                  FlagsLVal.getType());
6109   }
6110   if (Data.IsReductionWithTaskMod) {
6111     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6112     // is_ws, int num, void *data);
6113     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6114     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6115                                                   CGM.IntTy, /*isSigned=*/true);
6116     llvm::Value *Args[] = {
6117         IdentTLoc, GTid,
6118         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6119                                /*isSigned=*/true),
6120         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6121         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6122             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6123     return CGF.EmitRuntimeCall(
6124         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6125             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6126         Args);
6127   }
6128   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6129   llvm::Value *Args[] = {
6130       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6131                                 /*isSigned=*/true),
6132       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6133       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6134                                                       CGM.VoidPtrTy)};
6135   return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6136                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6137                              Args);
6138 }
6139 
6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6141                                             SourceLocation Loc,
6142                                             bool IsWorksharingReduction) {
6143   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6144   // is_ws, int num, void *data);
6145   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6146   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6147                                                 CGM.IntTy, /*isSigned=*/true);
6148   llvm::Value *Args[] = {IdentTLoc, GTid,
6149                          llvm::ConstantInt::get(CGM.IntTy,
6150                                                 IsWorksharingReduction ? 1 : 0,
6151                                                 /*isSigned=*/true)};
6152   (void)CGF.EmitRuntimeCall(
6153       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6154           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6155       Args);
6156 }
6157 
6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6159                                               SourceLocation Loc,
6160                                               ReductionCodeGen &RCG,
6161                                               unsigned N) {
6162   auto Sizes = RCG.getSizes(N);
6163   // Emit threadprivate global variable if the type is non-constant
6164   // (Sizes.second = nullptr).
6165   if (Sizes.second) {
6166     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6167                                                      /*isSigned=*/false);
6168     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6169         CGF, CGM.getContext().getSizeType(),
6170         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6171     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6172   }
6173 }
6174 
6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6176                                               SourceLocation Loc,
6177                                               llvm::Value *ReductionsPtr,
6178                                               LValue SharedLVal) {
6179   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6180   // *d);
6181   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                    CGM.IntTy,
6183                                                    /*isSigned=*/true),
6184                          ReductionsPtr,
6185                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6186                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6187   return Address(
6188       CGF.EmitRuntimeCall(
6189           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6190               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6191           Args),
6192       SharedLVal.getAlignment());
6193 }
6194 
6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6196                                        SourceLocation Loc) {
6197   if (!CGF.HaveInsertPoint())
6198     return;
6199 
6200   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6201   if (OMPBuilder) {
6202     OMPBuilder->CreateTaskwait(CGF.Builder);
6203   } else {
6204     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6205     // global_tid);
6206     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6207     // Ignore return result until untied tasks are supported.
6208     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6209                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6210                         Args);
6211   }
6212 
6213   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6214     Region->emitUntiedSwitch(CGF);
6215 }
6216 
6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6218                                            OpenMPDirectiveKind InnerKind,
6219                                            const RegionCodeGenTy &CodeGen,
6220                                            bool HasCancel) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6224   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6225 }
6226 
6227 namespace {
6228 enum RTCancelKind {
6229   CancelNoreq = 0,
6230   CancelParallel = 1,
6231   CancelLoop = 2,
6232   CancelSections = 3,
6233   CancelTaskgroup = 4
6234 };
6235 } // anonymous namespace
6236 
6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6238   RTCancelKind CancelKind = CancelNoreq;
6239   if (CancelRegion == OMPD_parallel)
6240     CancelKind = CancelParallel;
6241   else if (CancelRegion == OMPD_for)
6242     CancelKind = CancelLoop;
6243   else if (CancelRegion == OMPD_sections)
6244     CancelKind = CancelSections;
6245   else {
6246     assert(CancelRegion == OMPD_taskgroup);
6247     CancelKind = CancelTaskgroup;
6248   }
6249   return CancelKind;
6250 }
6251 
6252 void CGOpenMPRuntime::emitCancellationPointCall(
6253     CodeGenFunction &CGF, SourceLocation Loc,
6254     OpenMPDirectiveKind CancelRegion) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6258   // global_tid, kmp_int32 cncl_kind);
6259   if (auto *OMPRegionInfo =
6260           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6261     // For 'cancellation point taskgroup', the task region info may not have a
6262     // cancel. This may instead happen in another adjacent task.
6263     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6264       llvm::Value *Args[] = {
6265           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6266           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6267       // Ignore return result until untied tasks are supported.
6268       llvm::Value *Result = CGF.EmitRuntimeCall(
6269           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6270               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6271           Args);
6272       // if (__kmpc_cancellationpoint()) {
6273       //   exit from construct;
6274       // }
6275       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6276       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6277       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6278       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6279       CGF.EmitBlock(ExitBB);
6280       // exit from construct;
6281       CodeGenFunction::JumpDest CancelDest =
6282           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6283       CGF.EmitBranchThroughCleanup(CancelDest);
6284       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6285     }
6286   }
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6290                                      const Expr *IfCond,
6291                                      OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6295   // kmp_int32 cncl_kind);
6296   auto &M = CGM.getModule();
6297   if (auto *OMPRegionInfo =
6298           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6299     auto &&ThenGen = [&M, Loc, CancelRegion,
6300                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6301       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6302       llvm::Value *Args[] = {
6303           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6304           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6305       // Ignore return result until untied tasks are supported.
6306       llvm::Value *Result =
6307           CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6308                                   M, OMPRTL___kmpc_cancel),
6309                               Args);
6310       // if (__kmpc_cancel()) {
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       // exit from construct;
6319       CodeGenFunction::JumpDest CancelDest =
6320           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321       CGF.EmitBranchThroughCleanup(CancelDest);
6322       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323     };
6324     if (IfCond) {
6325       emitIfClause(CGF, IfCond, ThenGen,
6326                    [](CodeGenFunction &, PrePostActionTy &) {});
6327     } else {
6328       RegionCodeGenTy ThenRCG(ThenGen);
6329       ThenRCG(CGF);
6330     }
6331   }
6332 }
6333 
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338 
6339 public:
6340   OMPUsesAllocatorsActionTy(
6341       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342       : Allocators(Allocators) {}
6343   void Enter(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348           CGF, AllocatorData.first, AllocatorData.second);
6349     }
6350   }
6351   void Exit(CodeGenFunction &CGF) override {
6352     if (!CGF.HaveInsertPoint())
6353       return;
6354     for (const auto &AllocatorData : Allocators) {
6355       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356                                                         AllocatorData.first);
6357     }
6358   }
6359 };
6360 } // namespace
6361 
6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363     const OMPExecutableDirective &D, StringRef ParentName,
6364     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366   assert(!ParentName.empty() && "Invalid target region parent name!");
6367   HasEmittedTargetRegion = true;
6368   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372       if (!D.AllocatorTraits)
6373         continue;
6374       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375     }
6376   }
6377   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378   CodeGen.setAction(UsesAllocatorAction);
6379   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380                                    IsOffloadEntry, CodeGen);
6381 }
6382 
6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384                                              const Expr *Allocator,
6385                                              const Expr *AllocatorTraits) {
6386   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388   // Use default memspace handle.
6389   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390   llvm::Value *NumTraits = llvm::ConstantInt::get(
6391       CGF.IntTy, cast<ConstantArrayType>(
6392                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393                      ->getSize()
6394                      .getLimitedValue());
6395   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399                                            AllocatorTraitsLVal.getBaseInfo(),
6400                                            AllocatorTraitsLVal.getTBAAInfo());
6401   llvm::Value *Traits =
6402       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403 
6404   llvm::Value *AllocatorVal =
6405       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6406                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408   // Store to allocator.
6409   CGF.EmitVarDecl(*cast<VarDecl>(
6410       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412   AllocatorVal =
6413       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414                                Allocator->getType(), Allocator->getExprLoc());
6415   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417 
6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419                                              const Expr *Allocator) {
6420   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423   llvm::Value *AllocatorVal =
6424       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426                                           CGF.getContext().VoidPtrTy,
6427                                           Allocator->getExprLoc());
6428   (void)CGF.EmitRuntimeCall(
6429       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6430           CGM.getModule(), OMPRTL___kmpc_destroy_allocator),
6431       {ThreadId, AllocatorVal});
6432 }
6433 
6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435     const OMPExecutableDirective &D, StringRef ParentName,
6436     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438   // Create a unique name for the entry function using the source location
6439   // information of the current target region. The name will be something like:
6440   //
6441   // __omp_offloading_DD_FFFF_PP_lBB
6442   //
6443   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444   // mangled name of the function that encloses the target region and BB is the
6445   // line number of the target region.
6446 
6447   unsigned DeviceID;
6448   unsigned FileID;
6449   unsigned Line;
6450   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451                            Line);
6452   SmallString<64> EntryFnName;
6453   {
6454     llvm::raw_svector_ostream OS(EntryFnName);
6455     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457   }
6458 
6459   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460 
6461   CodeGenFunction CGF(CGM, true);
6462   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464 
6465   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466 
6467   // If this target outline function is not an offload entry, we don't need to
6468   // register it.
6469   if (!IsOffloadEntry)
6470     return;
6471 
6472   // The target region ID is used by the runtime library to identify the current
6473   // target region, so it only has to be unique and not necessarily point to
6474   // anything. It could be the pointer to the outlined function that implements
6475   // the target region, but we aren't using that so that the compiler doesn't
6476   // need to keep that, and could therefore inline the host function if proven
6477   // worthwhile during optimization. In the other hand, if emitting code for the
6478   // device, the ID has to be the function address so that it can retrieved from
6479   // the offloading entry and launched by the runtime library. We also mark the
6480   // outlined function to have external linkage in case we are emitting code for
6481   // the device, because these functions will be entry points to the device.
6482 
6483   if (CGM.getLangOpts().OpenMPIsDevice) {
6484     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486     OutlinedFn->setDSOLocal(false);
6487   } else {
6488     std::string Name = getName({EntryFnName, "region_id"});
6489     OutlinedFnID = new llvm::GlobalVariable(
6490         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6491         llvm::GlobalValue::WeakAnyLinkage,
6492         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6493   }
6494 
6495   // Register the information for the entry associated with this target region.
6496   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6497       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6498       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6499 }
6500 
6501 /// Checks if the expression is constant or does not have non-trivial function
6502 /// calls.
6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6504   // We can skip constant expressions.
6505   // We can skip expressions with trivial calls or simple expressions.
6506   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6507           !E->hasNonTrivialCall(Ctx)) &&
6508          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6509 }
6510 
6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6512                                                     const Stmt *Body) {
6513   const Stmt *Child = Body->IgnoreContainers();
6514   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6515     Child = nullptr;
6516     for (const Stmt *S : C->body()) {
6517       if (const auto *E = dyn_cast<Expr>(S)) {
6518         if (isTrivial(Ctx, E))
6519           continue;
6520       }
6521       // Some of the statements can be ignored.
6522       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6523           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6524         continue;
6525       // Analyze declarations.
6526       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6527         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6528               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6529                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6530                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6531                   isa<UsingDirectiveDecl>(D) ||
6532                   isa<OMPDeclareReductionDecl>(D) ||
6533                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6534                 return true;
6535               const auto *VD = dyn_cast<VarDecl>(D);
6536               if (!VD)
6537                 return false;
6538               return VD->isConstexpr() ||
6539                      ((VD->getType().isTrivialType(Ctx) ||
6540                        VD->getType()->isReferenceType()) &&
6541                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 /// Emit the number of teams for a target directive.  Inspect the num_teams
6557 /// clause associated with a teams construct combined or closely nested
6558 /// with the target directive.
6559 ///
6560 /// Emit a team of size one for directives such as 'target parallel' that
6561 /// have no associated teams construct.
6562 ///
6563 /// Otherwise, return nullptr.
6564 static llvm::Value *
6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6566                                const OMPExecutableDirective &D) {
6567   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6568          "Clauses associated with the teams directive expected to be emitted "
6569          "only for the host!");
6570   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6572          "Expected target-based executable directive.");
6573   CGBuilderTy &Bld = CGF.Builder;
6574   switch (DirectiveKind) {
6575   case OMPD_target: {
6576     const auto *CS = D.getInnermostCapturedStmt();
6577     const auto *Body =
6578         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6579     const Stmt *ChildStmt =
6580         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6581     if (const auto *NestedDir =
6582             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6583       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6584         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6585           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6586           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6587           const Expr *NumTeams =
6588               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6589           llvm::Value *NumTeamsVal =
6590               CGF.EmitScalarExpr(NumTeams,
6591                                  /*IgnoreResultAssign*/ true);
6592           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6593                                    /*isSigned=*/true);
6594         }
6595         return Bld.getInt32(0);
6596       }
6597       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6598           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6599         return Bld.getInt32(1);
6600       return Bld.getInt32(0);
6601     }
6602     return nullptr;
6603   }
6604   case OMPD_target_teams:
6605   case OMPD_target_teams_distribute:
6606   case OMPD_target_teams_distribute_simd:
6607   case OMPD_target_teams_distribute_parallel_for:
6608   case OMPD_target_teams_distribute_parallel_for_simd: {
6609     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6610       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611       const Expr *NumTeams =
6612           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6613       llvm::Value *NumTeamsVal =
6614           CGF.EmitScalarExpr(NumTeams,
6615                              /*IgnoreResultAssign*/ true);
6616       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6617                                /*isSigned=*/true);
6618     }
6619     return Bld.getInt32(0);
6620   }
6621   case OMPD_target_parallel:
6622   case OMPD_target_parallel_for:
6623   case OMPD_target_parallel_for_simd:
6624   case OMPD_target_simd:
6625     return Bld.getInt32(1);
6626   case OMPD_parallel:
6627   case OMPD_for:
6628   case OMPD_parallel_for:
6629   case OMPD_parallel_master:
6630   case OMPD_parallel_sections:
6631   case OMPD_for_simd:
6632   case OMPD_parallel_for_simd:
6633   case OMPD_cancel:
6634   case OMPD_cancellation_point:
6635   case OMPD_ordered:
6636   case OMPD_threadprivate:
6637   case OMPD_allocate:
6638   case OMPD_task:
6639   case OMPD_simd:
6640   case OMPD_sections:
6641   case OMPD_section:
6642   case OMPD_single:
6643   case OMPD_master:
6644   case OMPD_critical:
6645   case OMPD_taskyield:
6646   case OMPD_barrier:
6647   case OMPD_taskwait:
6648   case OMPD_taskgroup:
6649   case OMPD_atomic:
6650   case OMPD_flush:
6651   case OMPD_depobj:
6652   case OMPD_scan:
6653   case OMPD_teams:
6654   case OMPD_target_data:
6655   case OMPD_target_exit_data:
6656   case OMPD_target_enter_data:
6657   case OMPD_distribute:
6658   case OMPD_distribute_simd:
6659   case OMPD_distribute_parallel_for:
6660   case OMPD_distribute_parallel_for_simd:
6661   case OMPD_teams_distribute:
6662   case OMPD_teams_distribute_simd:
6663   case OMPD_teams_distribute_parallel_for:
6664   case OMPD_teams_distribute_parallel_for_simd:
6665   case OMPD_target_update:
6666   case OMPD_declare_simd:
6667   case OMPD_declare_variant:
6668   case OMPD_begin_declare_variant:
6669   case OMPD_end_declare_variant:
6670   case OMPD_declare_target:
6671   case OMPD_end_declare_target:
6672   case OMPD_declare_reduction:
6673   case OMPD_declare_mapper:
6674   case OMPD_taskloop:
6675   case OMPD_taskloop_simd:
6676   case OMPD_master_taskloop:
6677   case OMPD_master_taskloop_simd:
6678   case OMPD_parallel_master_taskloop:
6679   case OMPD_parallel_master_taskloop_simd:
6680   case OMPD_requires:
6681   case OMPD_unknown:
6682     break;
6683   }
6684   llvm_unreachable("Unexpected directive kind.");
6685 }
6686 
6687 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6688                                   llvm::Value *DefaultThreadLimitVal) {
6689   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6690       CGF.getContext(), CS->getCapturedStmt());
6691   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6692     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6693       llvm::Value *NumThreads = nullptr;
6694       llvm::Value *CondVal = nullptr;
6695       // Handle if clause. If if clause present, the number of threads is
6696       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6697       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6698         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6699         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6700         const OMPIfClause *IfClause = nullptr;
6701         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6702           if (C->getNameModifier() == OMPD_unknown ||
6703               C->getNameModifier() == OMPD_parallel) {
6704             IfClause = C;
6705             break;
6706           }
6707         }
6708         if (IfClause) {
6709           const Expr *Cond = IfClause->getCondition();
6710           bool Result;
6711           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6712             if (!Result)
6713               return CGF.Builder.getInt32(1);
6714           } else {
6715             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6716             if (const auto *PreInit =
6717                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6718               for (const auto *I : PreInit->decls()) {
6719                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6720                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6721                 } else {
6722                   CodeGenFunction::AutoVarEmission Emission =
6723                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6724                   CGF.EmitAutoVarCleanups(Emission);
6725                 }
6726               }
6727             }
6728             CondVal = CGF.EvaluateExprAsBool(Cond);
6729           }
6730         }
6731       }
6732       // Check the value of num_threads clause iff if clause was not specified
6733       // or is not evaluated to false.
6734       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6735         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6736         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6737         const auto *NumThreadsClause =
6738             Dir->getSingleClause<OMPNumThreadsClause>();
6739         CodeGenFunction::LexicalScope Scope(
6740             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6741         if (const auto *PreInit =
6742                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6743           for (const auto *I : PreInit->decls()) {
6744             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6745               CGF.EmitVarDecl(cast<VarDecl>(*I));
6746             } else {
6747               CodeGenFunction::AutoVarEmission Emission =
6748                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6749               CGF.EmitAutoVarCleanups(Emission);
6750             }
6751           }
6752         }
6753         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6754         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6755                                                /*isSigned=*/false);
6756         if (DefaultThreadLimitVal)
6757           NumThreads = CGF.Builder.CreateSelect(
6758               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6759               DefaultThreadLimitVal, NumThreads);
6760       } else {
6761         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6762                                            : CGF.Builder.getInt32(0);
6763       }
6764       // Process condition of the if clause.
6765       if (CondVal) {
6766         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6767                                               CGF.Builder.getInt32(1));
6768       }
6769       return NumThreads;
6770     }
6771     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6772       return CGF.Builder.getInt32(1);
6773     return DefaultThreadLimitVal;
6774   }
6775   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6776                                : CGF.Builder.getInt32(0);
6777 }
6778 
6779 /// Emit the number of threads for a target directive.  Inspect the
6780 /// thread_limit clause associated with a teams construct combined or closely
6781 /// nested with the target directive.
6782 ///
6783 /// Emit the num_threads clause for directives such as 'target parallel' that
6784 /// have no associated teams construct.
6785 ///
6786 /// Otherwise, return nullptr.
6787 static llvm::Value *
6788 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6789                                  const OMPExecutableDirective &D) {
6790   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6791          "Clauses associated with the teams directive expected to be emitted "
6792          "only for the host!");
6793   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6794   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6795          "Expected target-based executable directive.");
6796   CGBuilderTy &Bld = CGF.Builder;
6797   llvm::Value *ThreadLimitVal = nullptr;
6798   llvm::Value *NumThreadsVal = nullptr;
6799   switch (DirectiveKind) {
6800   case OMPD_target: {
6801     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6802     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6803       return NumThreads;
6804     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6805         CGF.getContext(), CS->getCapturedStmt());
6806     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6807       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6808         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6809         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6810         const auto *ThreadLimitClause =
6811             Dir->getSingleClause<OMPThreadLimitClause>();
6812         CodeGenFunction::LexicalScope Scope(
6813             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6814         if (const auto *PreInit =
6815                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6816           for (const auto *I : PreInit->decls()) {
6817             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6818               CGF.EmitVarDecl(cast<VarDecl>(*I));
6819             } else {
6820               CodeGenFunction::AutoVarEmission Emission =
6821                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6822               CGF.EmitAutoVarCleanups(Emission);
6823             }
6824           }
6825         }
6826         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6827             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6828         ThreadLimitVal =
6829             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6830       }
6831       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6832           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6833         CS = Dir->getInnermostCapturedStmt();
6834         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6835             CGF.getContext(), CS->getCapturedStmt());
6836         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6837       }
6838       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6839           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6840         CS = Dir->getInnermostCapturedStmt();
6841         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6842           return NumThreads;
6843       }
6844       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6845         return Bld.getInt32(1);
6846     }
6847     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6848   }
6849   case OMPD_target_teams: {
6850     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6851       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6852       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6853       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6854           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6855       ThreadLimitVal =
6856           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6857     }
6858     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6859     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6860       return NumThreads;
6861     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6862         CGF.getContext(), CS->getCapturedStmt());
6863     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6864       if (Dir->getDirectiveKind() == OMPD_distribute) {
6865         CS = Dir->getInnermostCapturedStmt();
6866         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6867           return NumThreads;
6868       }
6869     }
6870     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6871   }
6872   case OMPD_target_teams_distribute:
6873     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6874       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6875       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6876       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6877           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6878       ThreadLimitVal =
6879           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6880     }
6881     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6882   case OMPD_target_parallel:
6883   case OMPD_target_parallel_for:
6884   case OMPD_target_parallel_for_simd:
6885   case OMPD_target_teams_distribute_parallel_for:
6886   case OMPD_target_teams_distribute_parallel_for_simd: {
6887     llvm::Value *CondVal = nullptr;
6888     // Handle if clause. If if clause present, the number of threads is
6889     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6890     if (D.hasClausesOfKind<OMPIfClause>()) {
6891       const OMPIfClause *IfClause = nullptr;
6892       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6893         if (C->getNameModifier() == OMPD_unknown ||
6894             C->getNameModifier() == OMPD_parallel) {
6895           IfClause = C;
6896           break;
6897         }
6898       }
6899       if (IfClause) {
6900         const Expr *Cond = IfClause->getCondition();
6901         bool Result;
6902         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6903           if (!Result)
6904             return Bld.getInt32(1);
6905         } else {
6906           CodeGenFunction::RunCleanupsScope Scope(CGF);
6907           CondVal = CGF.EvaluateExprAsBool(Cond);
6908         }
6909       }
6910     }
6911     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6912       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6913       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6914       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6915           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6916       ThreadLimitVal =
6917           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6918     }
6919     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6920       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6921       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6922       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6923           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6924       NumThreadsVal =
6925           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6926       ThreadLimitVal = ThreadLimitVal
6927                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6928                                                                 ThreadLimitVal),
6929                                               NumThreadsVal, ThreadLimitVal)
6930                            : NumThreadsVal;
6931     }
6932     if (!ThreadLimitVal)
6933       ThreadLimitVal = Bld.getInt32(0);
6934     if (CondVal)
6935       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6936     return ThreadLimitVal;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     return Bld.getInt32(1);
6941   case OMPD_parallel:
6942   case OMPD_for:
6943   case OMPD_parallel_for:
6944   case OMPD_parallel_master:
6945   case OMPD_parallel_sections:
6946   case OMPD_for_simd:
6947   case OMPD_parallel_for_simd:
6948   case OMPD_cancel:
6949   case OMPD_cancellation_point:
6950   case OMPD_ordered:
6951   case OMPD_threadprivate:
6952   case OMPD_allocate:
6953   case OMPD_task:
6954   case OMPD_simd:
6955   case OMPD_sections:
6956   case OMPD_section:
6957   case OMPD_single:
6958   case OMPD_master:
6959   case OMPD_critical:
6960   case OMPD_taskyield:
6961   case OMPD_barrier:
6962   case OMPD_taskwait:
6963   case OMPD_taskgroup:
6964   case OMPD_atomic:
6965   case OMPD_flush:
6966   case OMPD_depobj:
6967   case OMPD_scan:
6968   case OMPD_teams:
6969   case OMPD_target_data:
6970   case OMPD_target_exit_data:
6971   case OMPD_target_enter_data:
6972   case OMPD_distribute:
6973   case OMPD_distribute_simd:
6974   case OMPD_distribute_parallel_for:
6975   case OMPD_distribute_parallel_for_simd:
6976   case OMPD_teams_distribute:
6977   case OMPD_teams_distribute_simd:
6978   case OMPD_teams_distribute_parallel_for:
6979   case OMPD_teams_distribute_parallel_for_simd:
6980   case OMPD_target_update:
6981   case OMPD_declare_simd:
6982   case OMPD_declare_variant:
6983   case OMPD_begin_declare_variant:
6984   case OMPD_end_declare_variant:
6985   case OMPD_declare_target:
6986   case OMPD_end_declare_target:
6987   case OMPD_declare_reduction:
6988   case OMPD_declare_mapper:
6989   case OMPD_taskloop:
6990   case OMPD_taskloop_simd:
6991   case OMPD_master_taskloop:
6992   case OMPD_master_taskloop_simd:
6993   case OMPD_parallel_master_taskloop:
6994   case OMPD_parallel_master_taskloop_simd:
6995   case OMPD_requires:
6996   case OMPD_unknown:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// The 16 MSBs of the flags indicate whether the entry is member of some
7047     /// struct/class.
7048     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7049     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7050   };
7051 
7052   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7053   static unsigned getFlagMemberOffset() {
7054     unsigned Offset = 0;
7055     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7056          Remain = Remain >> 1)
7057       Offset++;
7058     return Offset;
7059   }
7060 
7061   /// Class that associates information with a base pointer to be passed to the
7062   /// runtime library.
7063   class BasePointerInfo {
7064     /// The base pointer.
7065     llvm::Value *Ptr = nullptr;
7066     /// The base declaration that refers to this device pointer, or null if
7067     /// there is none.
7068     const ValueDecl *DevPtrDecl = nullptr;
7069 
7070   public:
7071     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7072         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7073     llvm::Value *operator*() const { return Ptr; }
7074     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7075     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7076   };
7077 
7078   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7079   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7080   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7081 
7082   /// Map between a struct and the its lowest & highest elements which have been
7083   /// mapped.
7084   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7085   ///                    HE(FieldIndex, Pointer)}
7086   struct StructRangeInfoTy {
7087     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7088         0, Address::invalid()};
7089     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7090         0, Address::invalid()};
7091     Address Base = Address::invalid();
7092   };
7093 
7094 private:
7095   /// Kind that defines how a device pointer has to be returned.
7096   struct MapInfo {
7097     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7098     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7099     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7100     bool ReturnDevicePointer = false;
7101     bool IsImplicit = false;
7102 
7103     MapInfo() = default;
7104     MapInfo(
7105         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7106         OpenMPMapClauseKind MapType,
7107         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7108         bool ReturnDevicePointer, bool IsImplicit)
7109         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7110           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7111   };
7112 
7113   /// If use_device_ptr is used on a pointer which is a struct member and there
7114   /// is no map information about it, then emission of that entry is deferred
7115   /// until the whole struct has been processed.
7116   struct DeferredDevicePtrEntryTy {
7117     const Expr *IE = nullptr;
7118     const ValueDecl *VD = nullptr;
7119 
7120     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7121         : IE(IE), VD(VD) {}
7122   };
7123 
7124   /// The target directive from where the mappable clauses were extracted. It
7125   /// is either a executable directive or a user-defined mapper directive.
7126   llvm::PointerUnion<const OMPExecutableDirective *,
7127                      const OMPDeclareMapperDecl *>
7128       CurDir;
7129 
7130   /// Function the directive is being generated for.
7131   CodeGenFunction &CGF;
7132 
7133   /// Set of all first private variables in the current directive.
7134   /// bool data is set to true if the variable is implicitly marked as
7135   /// firstprivate, false otherwise.
7136   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7137 
7138   /// Map between device pointer declarations and their expression components.
7139   /// The key value for declarations in 'this' is null.
7140   llvm::DenseMap<
7141       const ValueDecl *,
7142       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7143       DevPointersMap;
7144 
7145   llvm::Value *getExprTypeSize(const Expr *E) const {
7146     QualType ExprTy = E->getType().getCanonicalType();
7147 
7148     // Calculate the size for array shaping expression.
7149     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7150       llvm::Value *Size =
7151           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7152       for (const Expr *SE : OAE->getDimensions()) {
7153         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7154         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7155                                       CGF.getContext().getSizeType(),
7156                                       SE->getExprLoc());
7157         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7158       }
7159       return Size;
7160     }
7161 
7162     // Reference types are ignored for mapping purposes.
7163     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7164       ExprTy = RefTy->getPointeeType().getCanonicalType();
7165 
7166     // Given that an array section is considered a built-in type, we need to
7167     // do the calculation based on the length of the section instead of relying
7168     // on CGF.getTypeSize(E->getType()).
7169     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7170       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7171                             OAE->getBase()->IgnoreParenImpCasts())
7172                             .getCanonicalType();
7173 
7174       // If there is no length associated with the expression and lower bound is
7175       // not specified too, that means we are using the whole length of the
7176       // base.
7177       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7178           !OAE->getLowerBound())
7179         return CGF.getTypeSize(BaseTy);
7180 
7181       llvm::Value *ElemSize;
7182       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7183         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7184       } else {
7185         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7186         assert(ATy && "Expecting array type if not a pointer type.");
7187         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7188       }
7189 
7190       // If we don't have a length at this point, that is because we have an
7191       // array section with a single element.
7192       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7193         return ElemSize;
7194 
7195       if (const Expr *LenExpr = OAE->getLength()) {
7196         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7197         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7198                                              CGF.getContext().getSizeType(),
7199                                              LenExpr->getExprLoc());
7200         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7201       }
7202       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7203              OAE->getLowerBound() && "expected array_section[lb:].");
7204       // Size = sizetype - lb * elemtype;
7205       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7206       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7207       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7208                                        CGF.getContext().getSizeType(),
7209                                        OAE->getLowerBound()->getExprLoc());
7210       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7211       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7212       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7213       LengthVal = CGF.Builder.CreateSelect(
7214           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7215       return LengthVal;
7216     }
7217     return CGF.getTypeSize(ExprTy);
7218   }
7219 
7220   /// Return the corresponding bits for a given map clause modifier. Add
7221   /// a flag marking the map as a pointer if requested. Add a flag marking the
7222   /// map as the first one of a series of maps that relate to the same map
7223   /// expression.
7224   OpenMPOffloadMappingFlags getMapTypeBits(
7225       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7226       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7227     OpenMPOffloadMappingFlags Bits =
7228         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7229     switch (MapType) {
7230     case OMPC_MAP_alloc:
7231     case OMPC_MAP_release:
7232       // alloc and release is the default behavior in the runtime library,  i.e.
7233       // if we don't pass any bits alloc/release that is what the runtime is
7234       // going to do. Therefore, we don't need to signal anything for these two
7235       // type modifiers.
7236       break;
7237     case OMPC_MAP_to:
7238       Bits |= OMP_MAP_TO;
7239       break;
7240     case OMPC_MAP_from:
7241       Bits |= OMP_MAP_FROM;
7242       break;
7243     case OMPC_MAP_tofrom:
7244       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7245       break;
7246     case OMPC_MAP_delete:
7247       Bits |= OMP_MAP_DELETE;
7248       break;
7249     case OMPC_MAP_unknown:
7250       llvm_unreachable("Unexpected map type!");
7251     }
7252     if (AddPtrFlag)
7253       Bits |= OMP_MAP_PTR_AND_OBJ;
7254     if (AddIsTargetParamFlag)
7255       Bits |= OMP_MAP_TARGET_PARAM;
7256     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7257         != MapModifiers.end())
7258       Bits |= OMP_MAP_ALWAYS;
7259     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7260         != MapModifiers.end())
7261       Bits |= OMP_MAP_CLOSE;
7262     return Bits;
7263   }
7264 
7265   /// Return true if the provided expression is a final array section. A
7266   /// final array section, is one whose length can't be proved to be one.
7267   bool isFinalArraySectionExpression(const Expr *E) const {
7268     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7269 
7270     // It is not an array section and therefore not a unity-size one.
7271     if (!OASE)
7272       return false;
7273 
7274     // An array section with no colon always refer to a single element.
7275     if (OASE->getColonLoc().isInvalid())
7276       return false;
7277 
7278     const Expr *Length = OASE->getLength();
7279 
7280     // If we don't have a length we have to check if the array has size 1
7281     // for this dimension. Also, we should always expect a length if the
7282     // base type is pointer.
7283     if (!Length) {
7284       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7285                              OASE->getBase()->IgnoreParenImpCasts())
7286                              .getCanonicalType();
7287       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7288         return ATy->getSize().getSExtValue() != 1;
7289       // If we don't have a constant dimension length, we have to consider
7290       // the current section as having any size, so it is not necessarily
7291       // unitary. If it happen to be unity size, that's user fault.
7292       return true;
7293     }
7294 
7295     // Check if the length evaluates to 1.
7296     Expr::EvalResult Result;
7297     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7298       return true; // Can have more that size 1.
7299 
7300     llvm::APSInt ConstLength = Result.Val.getInt();
7301     return ConstLength.getSExtValue() != 1;
7302   }
7303 
7304   /// Generate the base pointers, section pointers, sizes and map type
7305   /// bits for the provided map type, map modifier, and expression components.
7306   /// \a IsFirstComponent should be set to true if the provided set of
7307   /// components is the first associated with a capture.
7308   void generateInfoForComponentList(
7309       OpenMPMapClauseKind MapType,
7310       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7311       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7312       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7313       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7314       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7315       bool IsImplicit,
7316       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7317           OverlappedElements = llvm::None) const {
7318     // The following summarizes what has to be generated for each map and the
7319     // types below. The generated information is expressed in this order:
7320     // base pointer, section pointer, size, flags
7321     // (to add to the ones that come from the map type and modifier).
7322     //
7323     // double d;
7324     // int i[100];
7325     // float *p;
7326     //
7327     // struct S1 {
7328     //   int i;
7329     //   float f[50];
7330     // }
7331     // struct S2 {
7332     //   int i;
7333     //   float f[50];
7334     //   S1 s;
7335     //   double *p;
7336     //   struct S2 *ps;
7337     // }
7338     // S2 s;
7339     // S2 *ps;
7340     //
7341     // map(d)
7342     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7343     //
7344     // map(i)
7345     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7346     //
7347     // map(i[1:23])
7348     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(p)
7351     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7352     //
7353     // map(p[1:24])
7354     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7355     //
7356     // map(s)
7357     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7358     //
7359     // map(s.i)
7360     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7361     //
7362     // map(s.s.f)
7363     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7364     //
7365     // map(s.p)
7366     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7367     //
7368     // map(to: s.p[:22])
7369     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7370     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7371     // &(s.p), &(s.p[0]), 22*sizeof(double),
7372     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7373     // (*) alloc space for struct members, only this is a target parameter
7374     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7375     //      optimizes this entry out, same in the examples below)
7376     // (***) map the pointee (map: to)
7377     //
7378     // map(s.ps)
7379     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7380     //
7381     // map(from: s.ps->s.i)
7382     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7383     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7384     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7385     //
7386     // map(to: s.ps->ps)
7387     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7388     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7389     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7390     //
7391     // map(s.ps->ps->ps)
7392     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7393     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7394     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7395     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7396     //
7397     // map(to: s.ps->ps->s.f[:22])
7398     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7399     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7400     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7401     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7402     //
7403     // map(ps)
7404     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7405     //
7406     // map(ps->i)
7407     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7408     //
7409     // map(ps->s.f)
7410     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7411     //
7412     // map(from: ps->p)
7413     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7414     //
7415     // map(to: ps->p[:22])
7416     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7417     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7418     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7419     //
7420     // map(ps->ps)
7421     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7422     //
7423     // map(from: ps->ps->s.i)
7424     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7425     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7426     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7427     //
7428     // map(from: ps->ps->ps)
7429     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7430     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7431     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7432     //
7433     // map(ps->ps->ps->ps)
7434     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7435     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7436     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7437     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7438     //
7439     // map(to: ps->ps->ps->s.f[:22])
7440     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7441     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7442     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7443     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7444     //
7445     // map(to: s.f[:22]) map(from: s.p[:33])
7446     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7447     //     sizeof(double*) (**), TARGET_PARAM
7448     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7449     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7450     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7451     // (*) allocate contiguous space needed to fit all mapped members even if
7452     //     we allocate space for members not mapped (in this example,
7453     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7454     //     them as well because they fall between &s.f[0] and &s.p)
7455     //
7456     // map(from: s.f[:22]) map(to: ps->p[:33])
7457     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7458     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7459     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7460     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7461     // (*) the struct this entry pertains to is the 2nd element in the list of
7462     //     arguments, hence MEMBER_OF(2)
7463     //
7464     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7465     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7466     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7467     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7468     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7469     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7470     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7471     // (*) the struct this entry pertains to is the 4th element in the list
7472     //     of arguments, hence MEMBER_OF(4)
7473 
7474     // Track if the map information being generated is the first for a capture.
7475     bool IsCaptureFirstInfo = IsFirstComponentList;
7476     // When the variable is on a declare target link or in a to clause with
7477     // unified memory, a reference is needed to hold the host/device address
7478     // of the variable.
7479     bool RequiresReference = false;
7480 
7481     // Scan the components from the base to the complete expression.
7482     auto CI = Components.rbegin();
7483     auto CE = Components.rend();
7484     auto I = CI;
7485 
7486     // Track if the map information being generated is the first for a list of
7487     // components.
7488     bool IsExpressionFirstInfo = true;
7489     Address BP = Address::invalid();
7490     const Expr *AssocExpr = I->getAssociatedExpression();
7491     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7492     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7493     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7494 
7495     if (isa<MemberExpr>(AssocExpr)) {
7496       // The base is the 'this' pointer. The content of the pointer is going
7497       // to be the base of the field being mapped.
7498       BP = CGF.LoadCXXThisAddress();
7499     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7500                (OASE &&
7501                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7502       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7503     } else if (OAShE &&
7504                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7505       BP = Address(
7506           CGF.EmitScalarExpr(OAShE->getBase()),
7507           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7508     } else {
7509       // The base is the reference to the variable.
7510       // BP = &Var.
7511       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7512       if (const auto *VD =
7513               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7514         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7515                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7516           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7517               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7518                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7519             RequiresReference = true;
7520             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7521           }
7522         }
7523       }
7524 
7525       // If the variable is a pointer and is being dereferenced (i.e. is not
7526       // the last component), the base has to be the pointer itself, not its
7527       // reference. References are ignored for mapping purposes.
7528       QualType Ty =
7529           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7530       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7531         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7532 
7533         // We do not need to generate individual map information for the
7534         // pointer, it can be associated with the combined storage.
7535         ++I;
7536       }
7537     }
7538 
7539     // Track whether a component of the list should be marked as MEMBER_OF some
7540     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7541     // in a component list should be marked as MEMBER_OF, all subsequent entries
7542     // do not belong to the base struct. E.g.
7543     // struct S2 s;
7544     // s.ps->ps->ps->f[:]
7545     //   (1) (2) (3) (4)
7546     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7547     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7548     // is the pointee of ps(2) which is not member of struct s, so it should not
7549     // be marked as such (it is still PTR_AND_OBJ).
7550     // The variable is initialized to false so that PTR_AND_OBJ entries which
7551     // are not struct members are not considered (e.g. array of pointers to
7552     // data).
7553     bool ShouldBeMemberOf = false;
7554 
7555     // Variable keeping track of whether or not we have encountered a component
7556     // in the component list which is a member expression. Useful when we have a
7557     // pointer or a final array section, in which case it is the previous
7558     // component in the list which tells us whether we have a member expression.
7559     // E.g. X.f[:]
7560     // While processing the final array section "[:]" it is "f" which tells us
7561     // whether we are dealing with a member of a declared struct.
7562     const MemberExpr *EncounteredME = nullptr;
7563 
7564     for (; I != CE; ++I) {
7565       // If the current component is member of a struct (parent struct) mark it.
7566       if (!EncounteredME) {
7567         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7568         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7569         // as MEMBER_OF the parent struct.
7570         if (EncounteredME)
7571           ShouldBeMemberOf = true;
7572       }
7573 
7574       auto Next = std::next(I);
7575 
7576       // We need to generate the addresses and sizes if this is the last
7577       // component, if the component is a pointer or if it is an array section
7578       // whose length can't be proved to be one. If this is a pointer, it
7579       // becomes the base address for the following components.
7580 
7581       // A final array section, is one whose length can't be proved to be one.
7582       bool IsFinalArraySection =
7583           isFinalArraySectionExpression(I->getAssociatedExpression());
7584 
7585       // Get information on whether the element is a pointer. Have to do a
7586       // special treatment for array sections given that they are built-in
7587       // types.
7588       const auto *OASE =
7589           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7590       const auto *OAShE =
7591           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7592       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7593       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7594       bool IsPointer =
7595           OAShE ||
7596           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7597                        .getCanonicalType()
7598                        ->isAnyPointerType()) ||
7599           I->getAssociatedExpression()->getType()->isAnyPointerType();
7600       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7601 
7602       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7603         // If this is not the last component, we expect the pointer to be
7604         // associated with an array expression or member expression.
7605         assert((Next == CE ||
7606                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7607                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7608                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7609                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7610                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7611                "Unexpected expression");
7612 
7613         Address LB = Address::invalid();
7614         if (OAShE) {
7615           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7616                        CGF.getContext().getTypeAlignInChars(
7617                            OAShE->getBase()->getType()));
7618         } else {
7619           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7620                    .getAddress(CGF);
7621         }
7622 
7623         // If this component is a pointer inside the base struct then we don't
7624         // need to create any entry for it - it will be combined with the object
7625         // it is pointing to into a single PTR_AND_OBJ entry.
7626         bool IsMemberPointer =
7627             IsPointer && EncounteredME &&
7628             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7629              EncounteredME);
7630         if (!OverlappedElements.empty()) {
7631           // Handle base element with the info for overlapped elements.
7632           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7633           assert(Next == CE &&
7634                  "Expected last element for the overlapped elements.");
7635           assert(!IsPointer &&
7636                  "Unexpected base element with the pointer type.");
7637           // Mark the whole struct as the struct that requires allocation on the
7638           // device.
7639           PartialStruct.LowestElem = {0, LB};
7640           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7641               I->getAssociatedExpression()->getType());
7642           Address HB = CGF.Builder.CreateConstGEP(
7643               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7644                                                               CGF.VoidPtrTy),
7645               TypeSize.getQuantity() - 1);
7646           PartialStruct.HighestElem = {
7647               std::numeric_limits<decltype(
7648                   PartialStruct.HighestElem.first)>::max(),
7649               HB};
7650           PartialStruct.Base = BP;
7651           // Emit data for non-overlapped data.
7652           OpenMPOffloadMappingFlags Flags =
7653               OMP_MAP_MEMBER_OF |
7654               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7655                              /*AddPtrFlag=*/false,
7656                              /*AddIsTargetParamFlag=*/false);
7657           LB = BP;
7658           llvm::Value *Size = nullptr;
7659           // Do bitcopy of all non-overlapped structure elements.
7660           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7661                    Component : OverlappedElements) {
7662             Address ComponentLB = Address::invalid();
7663             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7664                  Component) {
7665               if (MC.getAssociatedDeclaration()) {
7666                 ComponentLB =
7667                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7668                         .getAddress(CGF);
7669                 Size = CGF.Builder.CreatePtrDiff(
7670                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7671                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7672                 break;
7673               }
7674             }
7675             BasePointers.push_back(BP.getPointer());
7676             Pointers.push_back(LB.getPointer());
7677             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7678                                                       /*isSigned=*/true));
7679             Types.push_back(Flags);
7680             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7681           }
7682           BasePointers.push_back(BP.getPointer());
7683           Pointers.push_back(LB.getPointer());
7684           Size = CGF.Builder.CreatePtrDiff(
7685               CGF.EmitCastToVoidPtr(
7686                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7687               CGF.EmitCastToVoidPtr(LB.getPointer()));
7688           Sizes.push_back(
7689               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7690           Types.push_back(Flags);
7691           break;
7692         }
7693         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7694         if (!IsMemberPointer) {
7695           BasePointers.push_back(BP.getPointer());
7696           Pointers.push_back(LB.getPointer());
7697           Sizes.push_back(
7698               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7699 
7700           // We need to add a pointer flag for each map that comes from the
7701           // same expression except for the first one. We also need to signal
7702           // this map is the first one that relates with the current capture
7703           // (there is a set of entries for each capture).
7704           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7705               MapType, MapModifiers, IsImplicit,
7706               !IsExpressionFirstInfo || RequiresReference,
7707               IsCaptureFirstInfo && !RequiresReference);
7708 
7709           if (!IsExpressionFirstInfo) {
7710             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7711             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7712             if (IsPointer)
7713               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7714                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7715 
7716             if (ShouldBeMemberOf) {
7717               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7718               // should be later updated with the correct value of MEMBER_OF.
7719               Flags |= OMP_MAP_MEMBER_OF;
7720               // From now on, all subsequent PTR_AND_OBJ entries should not be
7721               // marked as MEMBER_OF.
7722               ShouldBeMemberOf = false;
7723             }
7724           }
7725 
7726           Types.push_back(Flags);
7727         }
7728 
7729         // If we have encountered a member expression so far, keep track of the
7730         // mapped member. If the parent is "*this", then the value declaration
7731         // is nullptr.
7732         if (EncounteredME) {
7733           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7734           unsigned FieldIndex = FD->getFieldIndex();
7735 
7736           // Update info about the lowest and highest elements for this struct
7737           if (!PartialStruct.Base.isValid()) {
7738             PartialStruct.LowestElem = {FieldIndex, LB};
7739             PartialStruct.HighestElem = {FieldIndex, LB};
7740             PartialStruct.Base = BP;
7741           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7742             PartialStruct.LowestElem = {FieldIndex, LB};
7743           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7744             PartialStruct.HighestElem = {FieldIndex, LB};
7745           }
7746         }
7747 
7748         // If we have a final array section, we are done with this expression.
7749         if (IsFinalArraySection)
7750           break;
7751 
7752         // The pointer becomes the base for the next element.
7753         if (Next != CE)
7754           BP = LB;
7755 
7756         IsExpressionFirstInfo = false;
7757         IsCaptureFirstInfo = false;
7758       }
7759     }
7760   }
7761 
7762   /// Return the adjusted map modifiers if the declaration a capture refers to
7763   /// appears in a first-private clause. This is expected to be used only with
7764   /// directives that start with 'target'.
7765   MappableExprsHandler::OpenMPOffloadMappingFlags
7766   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7767     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7768 
7769     // A first private variable captured by reference will use only the
7770     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7771     // declaration is known as first-private in this handler.
7772     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7773       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7774           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7775         return MappableExprsHandler::OMP_MAP_ALWAYS |
7776                MappableExprsHandler::OMP_MAP_TO;
7777       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7778         return MappableExprsHandler::OMP_MAP_TO |
7779                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7780       return MappableExprsHandler::OMP_MAP_PRIVATE |
7781              MappableExprsHandler::OMP_MAP_TO;
7782     }
7783     return MappableExprsHandler::OMP_MAP_TO |
7784            MappableExprsHandler::OMP_MAP_FROM;
7785   }
7786 
7787   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7788     // Rotate by getFlagMemberOffset() bits.
7789     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7790                                                   << getFlagMemberOffset());
7791   }
7792 
7793   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7794                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7795     // If the entry is PTR_AND_OBJ but has not been marked with the special
7796     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7797     // marked as MEMBER_OF.
7798     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7799         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7800       return;
7801 
7802     // Reset the placeholder value to prepare the flag for the assignment of the
7803     // proper MEMBER_OF value.
7804     Flags &= ~OMP_MAP_MEMBER_OF;
7805     Flags |= MemberOfFlag;
7806   }
7807 
7808   void getPlainLayout(const CXXRecordDecl *RD,
7809                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7810                       bool AsBase) const {
7811     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7812 
7813     llvm::StructType *St =
7814         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7815 
7816     unsigned NumElements = St->getNumElements();
7817     llvm::SmallVector<
7818         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7819         RecordLayout(NumElements);
7820 
7821     // Fill bases.
7822     for (const auto &I : RD->bases()) {
7823       if (I.isVirtual())
7824         continue;
7825       const auto *Base = I.getType()->getAsCXXRecordDecl();
7826       // Ignore empty bases.
7827       if (Base->isEmpty() || CGF.getContext()
7828                                  .getASTRecordLayout(Base)
7829                                  .getNonVirtualSize()
7830                                  .isZero())
7831         continue;
7832 
7833       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7834       RecordLayout[FieldIndex] = Base;
7835     }
7836     // Fill in virtual bases.
7837     for (const auto &I : RD->vbases()) {
7838       const auto *Base = I.getType()->getAsCXXRecordDecl();
7839       // Ignore empty bases.
7840       if (Base->isEmpty())
7841         continue;
7842       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7843       if (RecordLayout[FieldIndex])
7844         continue;
7845       RecordLayout[FieldIndex] = Base;
7846     }
7847     // Fill in all the fields.
7848     assert(!RD->isUnion() && "Unexpected union.");
7849     for (const auto *Field : RD->fields()) {
7850       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7851       // will fill in later.)
7852       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7853         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7854         RecordLayout[FieldIndex] = Field;
7855       }
7856     }
7857     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7858              &Data : RecordLayout) {
7859       if (Data.isNull())
7860         continue;
7861       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7862         getPlainLayout(Base, Layout, /*AsBase=*/true);
7863       else
7864         Layout.push_back(Data.get<const FieldDecl *>());
7865     }
7866   }
7867 
7868 public:
7869   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7870       : CurDir(&Dir), CGF(CGF) {
7871     // Extract firstprivate clause information.
7872     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7873       for (const auto *D : C->varlists())
7874         FirstPrivateDecls.try_emplace(
7875             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7876     // Extract implicit firstprivates from uses_allocators clauses.
7877     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7878       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7879         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7880         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7881           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7882                                         /*Implicit=*/true);
7883         else if (const auto *VD = dyn_cast<VarDecl>(
7884                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7885                          ->getDecl()))
7886           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7887       }
7888     }
7889     // Extract device pointer clause information.
7890     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7891       for (auto L : C->component_lists())
7892         DevPointersMap[L.first].push_back(L.second);
7893   }
7894 
7895   /// Constructor for the declare mapper directive.
7896   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7897       : CurDir(&Dir), CGF(CGF) {}
7898 
7899   /// Generate code for the combined entry if we have a partially mapped struct
7900   /// and take care of the mapping flags of the arguments corresponding to
7901   /// individual struct members.
7902   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7903                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7904                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7905                          const StructRangeInfoTy &PartialStruct) const {
7906     // Base is the base of the struct
7907     BasePointers.push_back(PartialStruct.Base.getPointer());
7908     // Pointer is the address of the lowest element
7909     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7910     Pointers.push_back(LB);
7911     // Size is (addr of {highest+1} element) - (addr of lowest element)
7912     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7913     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7914     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7915     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7916     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7917     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7918                                                   /*isSigned=*/false);
7919     Sizes.push_back(Size);
7920     // Map type is always TARGET_PARAM
7921     Types.push_back(OMP_MAP_TARGET_PARAM);
7922     // Remove TARGET_PARAM flag from the first element
7923     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7924 
7925     // All other current entries will be MEMBER_OF the combined entry
7926     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7927     // 0xFFFF in the MEMBER_OF field).
7928     OpenMPOffloadMappingFlags MemberOfFlag =
7929         getMemberOfFlag(BasePointers.size() - 1);
7930     for (auto &M : CurTypes)
7931       setCorrectMemberOfFlag(M, MemberOfFlag);
7932   }
7933 
7934   /// Generate all the base pointers, section pointers, sizes and map
7935   /// types for the extracted mappable expressions. Also, for each item that
7936   /// relates with a device pointer, a pair of the relevant declaration and
7937   /// index where it occurs is appended to the device pointers info array.
7938   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7939                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7940                        MapFlagsArrayTy &Types) const {
7941     // We have to process the component lists that relate with the same
7942     // declaration in a single chunk so that we can generate the map flags
7943     // correctly. Therefore, we organize all lists in a map.
7944     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7945 
7946     // Helper function to fill the information map for the different supported
7947     // clauses.
7948     auto &&InfoGen = [&Info](
7949         const ValueDecl *D,
7950         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7951         OpenMPMapClauseKind MapType,
7952         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7953         bool ReturnDevicePointer, bool IsImplicit) {
7954       const ValueDecl *VD =
7955           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7956       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7957                             IsImplicit);
7958     };
7959 
7960     assert(CurDir.is<const OMPExecutableDirective *>() &&
7961            "Expect a executable directive");
7962     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7963     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7964       for (const auto L : C->component_lists()) {
7965         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7966             /*ReturnDevicePointer=*/false, C->isImplicit());
7967       }
7968     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7969       for (const auto L : C->component_lists()) {
7970         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7971             /*ReturnDevicePointer=*/false, C->isImplicit());
7972       }
7973     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7974       for (const auto L : C->component_lists()) {
7975         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7976             /*ReturnDevicePointer=*/false, C->isImplicit());
7977       }
7978 
7979     // Look at the use_device_ptr clause information and mark the existing map
7980     // entries as such. If there is no map information for an entry in the
7981     // use_device_ptr list, we create one with map type 'alloc' and zero size
7982     // section. It is the user fault if that was not mapped before. If there is
7983     // no map information and the pointer is a struct member, then we defer the
7984     // emission of that entry until the whole struct has been processed.
7985     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7986         DeferredInfo;
7987 
7988     for (const auto *C :
7989          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7990       for (const auto L : C->component_lists()) {
7991         assert(!L.second.empty() && "Not expecting empty list of components!");
7992         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7993         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7994         const Expr *IE = L.second.back().getAssociatedExpression();
7995         // If the first component is a member expression, we have to look into
7996         // 'this', which maps to null in the map of map information. Otherwise
7997         // look directly for the information.
7998         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7999 
8000         // We potentially have map information for this declaration already.
8001         // Look for the first set of components that refer to it.
8002         if (It != Info.end()) {
8003           auto CI = std::find_if(
8004               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8005                 return MI.Components.back().getAssociatedDeclaration() == VD;
8006               });
8007           // If we found a map entry, signal that the pointer has to be returned
8008           // and move on to the next declaration.
8009           if (CI != It->second.end()) {
8010             CI->ReturnDevicePointer = true;
8011             continue;
8012           }
8013         }
8014 
8015         // We didn't find any match in our map information - generate a zero
8016         // size array section - if the pointer is a struct member we defer this
8017         // action until the whole struct has been processed.
8018         if (isa<MemberExpr>(IE)) {
8019           // Insert the pointer into Info to be processed by
8020           // generateInfoForComponentList. Because it is a member pointer
8021           // without a pointee, no entry will be generated for it, therefore
8022           // we need to generate one after the whole struct has been processed.
8023           // Nonetheless, generateInfoForComponentList must be called to take
8024           // the pointer into account for the calculation of the range of the
8025           // partial struct.
8026           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8027                   /*ReturnDevicePointer=*/false, C->isImplicit());
8028           DeferredInfo[nullptr].emplace_back(IE, VD);
8029         } else {
8030           llvm::Value *Ptr =
8031               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8032           BasePointers.emplace_back(Ptr, VD);
8033           Pointers.push_back(Ptr);
8034           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8035           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8036         }
8037       }
8038     }
8039 
8040     for (const auto &M : Info) {
8041       // We need to know when we generate information for the first component
8042       // associated with a capture, because the mapping flags depend on it.
8043       bool IsFirstComponentList = true;
8044 
8045       // Temporary versions of arrays
8046       MapBaseValuesArrayTy CurBasePointers;
8047       MapValuesArrayTy CurPointers;
8048       MapValuesArrayTy CurSizes;
8049       MapFlagsArrayTy CurTypes;
8050       StructRangeInfoTy PartialStruct;
8051 
8052       for (const MapInfo &L : M.second) {
8053         assert(!L.Components.empty() &&
8054                "Not expecting declaration with no component lists.");
8055 
8056         // Remember the current base pointer index.
8057         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8058         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8059                                      CurBasePointers, CurPointers, CurSizes,
8060                                      CurTypes, PartialStruct,
8061                                      IsFirstComponentList, L.IsImplicit);
8062 
8063         // If this entry relates with a device pointer, set the relevant
8064         // declaration and add the 'return pointer' flag.
8065         if (L.ReturnDevicePointer) {
8066           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8067                  "Unexpected number of mapped base pointers.");
8068 
8069           const ValueDecl *RelevantVD =
8070               L.Components.back().getAssociatedDeclaration();
8071           assert(RelevantVD &&
8072                  "No relevant declaration related with device pointer??");
8073 
8074           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8075           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8076         }
8077         IsFirstComponentList = false;
8078       }
8079 
8080       // Append any pending zero-length pointers which are struct members and
8081       // used with use_device_ptr.
8082       auto CI = DeferredInfo.find(M.first);
8083       if (CI != DeferredInfo.end()) {
8084         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8085           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8086           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8087               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8088           CurBasePointers.emplace_back(BasePtr, L.VD);
8089           CurPointers.push_back(Ptr);
8090           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8091           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8092           // value MEMBER_OF=FFFF so that the entry is later updated with the
8093           // correct value of MEMBER_OF.
8094           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8095                              OMP_MAP_MEMBER_OF);
8096         }
8097       }
8098 
8099       // If there is an entry in PartialStruct it means we have a struct with
8100       // individual members mapped. Emit an extra combined entry.
8101       if (PartialStruct.Base.isValid())
8102         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8103                           PartialStruct);
8104 
8105       // We need to append the results of this capture to what we already have.
8106       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8107       Pointers.append(CurPointers.begin(), CurPointers.end());
8108       Sizes.append(CurSizes.begin(), CurSizes.end());
8109       Types.append(CurTypes.begin(), CurTypes.end());
8110     }
8111   }
8112 
8113   /// Generate all the base pointers, section pointers, sizes and map types for
8114   /// the extracted map clauses of user-defined mapper.
8115   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8116                                 MapValuesArrayTy &Pointers,
8117                                 MapValuesArrayTy &Sizes,
8118                                 MapFlagsArrayTy &Types) const {
8119     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8120            "Expect a declare mapper directive");
8121     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8122     // We have to process the component lists that relate with the same
8123     // declaration in a single chunk so that we can generate the map flags
8124     // correctly. Therefore, we organize all lists in a map.
8125     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8126 
8127     // Helper function to fill the information map for the different supported
8128     // clauses.
8129     auto &&InfoGen = [&Info](
8130         const ValueDecl *D,
8131         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8132         OpenMPMapClauseKind MapType,
8133         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8134         bool ReturnDevicePointer, bool IsImplicit) {
8135       const ValueDecl *VD =
8136           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8137       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8138                             IsImplicit);
8139     };
8140 
8141     for (const auto *C : CurMapperDir->clauselists()) {
8142       const auto *MC = cast<OMPMapClause>(C);
8143       for (const auto L : MC->component_lists()) {
8144         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8145                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8146       }
8147     }
8148 
8149     for (const auto &M : Info) {
8150       // We need to know when we generate information for the first component
8151       // associated with a capture, because the mapping flags depend on it.
8152       bool IsFirstComponentList = true;
8153 
8154       // Temporary versions of arrays
8155       MapBaseValuesArrayTy CurBasePointers;
8156       MapValuesArrayTy CurPointers;
8157       MapValuesArrayTy CurSizes;
8158       MapFlagsArrayTy CurTypes;
8159       StructRangeInfoTy PartialStruct;
8160 
8161       for (const MapInfo &L : M.second) {
8162         assert(!L.Components.empty() &&
8163                "Not expecting declaration with no component lists.");
8164         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8165                                      CurBasePointers, CurPointers, CurSizes,
8166                                      CurTypes, PartialStruct,
8167                                      IsFirstComponentList, L.IsImplicit);
8168         IsFirstComponentList = false;
8169       }
8170 
8171       // If there is an entry in PartialStruct it means we have a struct with
8172       // individual members mapped. Emit an extra combined entry.
8173       if (PartialStruct.Base.isValid())
8174         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8175                           PartialStruct);
8176 
8177       // We need to append the results of this capture to what we already have.
8178       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8179       Pointers.append(CurPointers.begin(), CurPointers.end());
8180       Sizes.append(CurSizes.begin(), CurSizes.end());
8181       Types.append(CurTypes.begin(), CurTypes.end());
8182     }
8183   }
8184 
8185   /// Emit capture info for lambdas for variables captured by reference.
8186   void generateInfoForLambdaCaptures(
8187       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8188       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8189       MapFlagsArrayTy &Types,
8190       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8191     const auto *RD = VD->getType()
8192                          .getCanonicalType()
8193                          .getNonReferenceType()
8194                          ->getAsCXXRecordDecl();
8195     if (!RD || !RD->isLambda())
8196       return;
8197     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8198     LValue VDLVal = CGF.MakeAddrLValue(
8199         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8200     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8201     FieldDecl *ThisCapture = nullptr;
8202     RD->getCaptureFields(Captures, ThisCapture);
8203     if (ThisCapture) {
8204       LValue ThisLVal =
8205           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8206       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8207       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8208                                  VDLVal.getPointer(CGF));
8209       BasePointers.push_back(ThisLVal.getPointer(CGF));
8210       Pointers.push_back(ThisLValVal.getPointer(CGF));
8211       Sizes.push_back(
8212           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8213                                     CGF.Int64Ty, /*isSigned=*/true));
8214       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8215                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8216     }
8217     for (const LambdaCapture &LC : RD->captures()) {
8218       if (!LC.capturesVariable())
8219         continue;
8220       const VarDecl *VD = LC.getCapturedVar();
8221       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8222         continue;
8223       auto It = Captures.find(VD);
8224       assert(It != Captures.end() && "Found lambda capture without field.");
8225       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8226       if (LC.getCaptureKind() == LCK_ByRef) {
8227         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8228         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8229                                    VDLVal.getPointer(CGF));
8230         BasePointers.push_back(VarLVal.getPointer(CGF));
8231         Pointers.push_back(VarLValVal.getPointer(CGF));
8232         Sizes.push_back(CGF.Builder.CreateIntCast(
8233             CGF.getTypeSize(
8234                 VD->getType().getCanonicalType().getNonReferenceType()),
8235             CGF.Int64Ty, /*isSigned=*/true));
8236       } else {
8237         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8238         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8239                                    VDLVal.getPointer(CGF));
8240         BasePointers.push_back(VarLVal.getPointer(CGF));
8241         Pointers.push_back(VarRVal.getScalarVal());
8242         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8243       }
8244       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8245                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8246     }
8247   }
8248 
8249   /// Set correct indices for lambdas captures.
8250   void adjustMemberOfForLambdaCaptures(
8251       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8252       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8253       MapFlagsArrayTy &Types) const {
8254     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8255       // Set correct member_of idx for all implicit lambda captures.
8256       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8257                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8258         continue;
8259       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8260       assert(BasePtr && "Unable to find base lambda address.");
8261       int TgtIdx = -1;
8262       for (unsigned J = I; J > 0; --J) {
8263         unsigned Idx = J - 1;
8264         if (Pointers[Idx] != BasePtr)
8265           continue;
8266         TgtIdx = Idx;
8267         break;
8268       }
8269       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8270       // All other current entries will be MEMBER_OF the combined entry
8271       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8272       // 0xFFFF in the MEMBER_OF field).
8273       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8274       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8275     }
8276   }
8277 
8278   /// Generate the base pointers, section pointers, sizes and map types
8279   /// associated to a given capture.
8280   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8281                               llvm::Value *Arg,
8282                               MapBaseValuesArrayTy &BasePointers,
8283                               MapValuesArrayTy &Pointers,
8284                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8285                               StructRangeInfoTy &PartialStruct) const {
8286     assert(!Cap->capturesVariableArrayType() &&
8287            "Not expecting to generate map info for a variable array type!");
8288 
8289     // We need to know when we generating information for the first component
8290     const ValueDecl *VD = Cap->capturesThis()
8291                               ? nullptr
8292                               : Cap->getCapturedVar()->getCanonicalDecl();
8293 
8294     // If this declaration appears in a is_device_ptr clause we just have to
8295     // pass the pointer by value. If it is a reference to a declaration, we just
8296     // pass its value.
8297     if (DevPointersMap.count(VD)) {
8298       BasePointers.emplace_back(Arg, VD);
8299       Pointers.push_back(Arg);
8300       Sizes.push_back(
8301           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8302                                     CGF.Int64Ty, /*isSigned=*/true));
8303       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8304       return;
8305     }
8306 
8307     using MapData =
8308         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8309                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8310     SmallVector<MapData, 4> DeclComponentLists;
8311     assert(CurDir.is<const OMPExecutableDirective *>() &&
8312            "Expect a executable directive");
8313     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8314     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8315       for (const auto L : C->decl_component_lists(VD)) {
8316         assert(L.first == VD &&
8317                "We got information for the wrong declaration??");
8318         assert(!L.second.empty() &&
8319                "Not expecting declaration with no component lists.");
8320         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8321                                         C->getMapTypeModifiers(),
8322                                         C->isImplicit());
8323       }
8324     }
8325 
8326     // Find overlapping elements (including the offset from the base element).
8327     llvm::SmallDenseMap<
8328         const MapData *,
8329         llvm::SmallVector<
8330             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8331         4>
8332         OverlappedData;
8333     size_t Count = 0;
8334     for (const MapData &L : DeclComponentLists) {
8335       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8336       OpenMPMapClauseKind MapType;
8337       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8338       bool IsImplicit;
8339       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8340       ++Count;
8341       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8342         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8343         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8344         auto CI = Components.rbegin();
8345         auto CE = Components.rend();
8346         auto SI = Components1.rbegin();
8347         auto SE = Components1.rend();
8348         for (; CI != CE && SI != SE; ++CI, ++SI) {
8349           if (CI->getAssociatedExpression()->getStmtClass() !=
8350               SI->getAssociatedExpression()->getStmtClass())
8351             break;
8352           // Are we dealing with different variables/fields?
8353           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8354             break;
8355         }
8356         // Found overlapping if, at least for one component, reached the head of
8357         // the components list.
8358         if (CI == CE || SI == SE) {
8359           assert((CI != CE || SI != SE) &&
8360                  "Unexpected full match of the mapping components.");
8361           const MapData &BaseData = CI == CE ? L : L1;
8362           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8363               SI == SE ? Components : Components1;
8364           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8365           OverlappedElements.getSecond().push_back(SubData);
8366         }
8367       }
8368     }
8369     // Sort the overlapped elements for each item.
8370     llvm::SmallVector<const FieldDecl *, 4> Layout;
8371     if (!OverlappedData.empty()) {
8372       if (const auto *CRD =
8373               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8374         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8375       else {
8376         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8377         Layout.append(RD->field_begin(), RD->field_end());
8378       }
8379     }
8380     for (auto &Pair : OverlappedData) {
8381       llvm::sort(
8382           Pair.getSecond(),
8383           [&Layout](
8384               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8385               OMPClauseMappableExprCommon::MappableExprComponentListRef
8386                   Second) {
8387             auto CI = First.rbegin();
8388             auto CE = First.rend();
8389             auto SI = Second.rbegin();
8390             auto SE = Second.rend();
8391             for (; CI != CE && SI != SE; ++CI, ++SI) {
8392               if (CI->getAssociatedExpression()->getStmtClass() !=
8393                   SI->getAssociatedExpression()->getStmtClass())
8394                 break;
8395               // Are we dealing with different variables/fields?
8396               if (CI->getAssociatedDeclaration() !=
8397                   SI->getAssociatedDeclaration())
8398                 break;
8399             }
8400 
8401             // Lists contain the same elements.
8402             if (CI == CE && SI == SE)
8403               return false;
8404 
8405             // List with less elements is less than list with more elements.
8406             if (CI == CE || SI == SE)
8407               return CI == CE;
8408 
8409             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8410             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8411             if (FD1->getParent() == FD2->getParent())
8412               return FD1->getFieldIndex() < FD2->getFieldIndex();
8413             const auto It =
8414                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8415                   return FD == FD1 || FD == FD2;
8416                 });
8417             return *It == FD1;
8418           });
8419     }
8420 
8421     // Associated with a capture, because the mapping flags depend on it.
8422     // Go through all of the elements with the overlapped elements.
8423     for (const auto &Pair : OverlappedData) {
8424       const MapData &L = *Pair.getFirst();
8425       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8426       OpenMPMapClauseKind MapType;
8427       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8428       bool IsImplicit;
8429       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8430       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8431           OverlappedComponents = Pair.getSecond();
8432       bool IsFirstComponentList = true;
8433       generateInfoForComponentList(MapType, MapModifiers, Components,
8434                                    BasePointers, Pointers, Sizes, Types,
8435                                    PartialStruct, IsFirstComponentList,
8436                                    IsImplicit, OverlappedComponents);
8437     }
8438     // Go through other elements without overlapped elements.
8439     bool IsFirstComponentList = OverlappedData.empty();
8440     for (const MapData &L : DeclComponentLists) {
8441       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8442       OpenMPMapClauseKind MapType;
8443       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8444       bool IsImplicit;
8445       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8446       auto It = OverlappedData.find(&L);
8447       if (It == OverlappedData.end())
8448         generateInfoForComponentList(MapType, MapModifiers, Components,
8449                                      BasePointers, Pointers, Sizes, Types,
8450                                      PartialStruct, IsFirstComponentList,
8451                                      IsImplicit);
8452       IsFirstComponentList = false;
8453     }
8454   }
8455 
8456   /// Generate the base pointers, section pointers, sizes and map types
8457   /// associated with the declare target link variables.
8458   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8459                                         MapValuesArrayTy &Pointers,
8460                                         MapValuesArrayTy &Sizes,
8461                                         MapFlagsArrayTy &Types) const {
8462     assert(CurDir.is<const OMPExecutableDirective *>() &&
8463            "Expect a executable directive");
8464     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8465     // Map other list items in the map clause which are not captured variables
8466     // but "declare target link" global variables.
8467     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8468       for (const auto L : C->component_lists()) {
8469         if (!L.first)
8470           continue;
8471         const auto *VD = dyn_cast<VarDecl>(L.first);
8472         if (!VD)
8473           continue;
8474         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8475             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8476         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8477             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8478           continue;
8479         StructRangeInfoTy PartialStruct;
8480         generateInfoForComponentList(
8481             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8482             Pointers, Sizes, Types, PartialStruct,
8483             /*IsFirstComponentList=*/true, C->isImplicit());
8484         assert(!PartialStruct.Base.isValid() &&
8485                "No partial structs for declare target link expected.");
8486       }
8487     }
8488   }
8489 
8490   /// Generate the default map information for a given capture \a CI,
8491   /// record field declaration \a RI and captured value \a CV.
8492   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8493                               const FieldDecl &RI, llvm::Value *CV,
8494                               MapBaseValuesArrayTy &CurBasePointers,
8495                               MapValuesArrayTy &CurPointers,
8496                               MapValuesArrayTy &CurSizes,
8497                               MapFlagsArrayTy &CurMapTypes) const {
8498     bool IsImplicit = true;
8499     // Do the default mapping.
8500     if (CI.capturesThis()) {
8501       CurBasePointers.push_back(CV);
8502       CurPointers.push_back(CV);
8503       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8504       CurSizes.push_back(
8505           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8506                                     CGF.Int64Ty, /*isSigned=*/true));
8507       // Default map type.
8508       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8509     } else if (CI.capturesVariableByCopy()) {
8510       CurBasePointers.push_back(CV);
8511       CurPointers.push_back(CV);
8512       if (!RI.getType()->isAnyPointerType()) {
8513         // We have to signal to the runtime captures passed by value that are
8514         // not pointers.
8515         CurMapTypes.push_back(OMP_MAP_LITERAL);
8516         CurSizes.push_back(CGF.Builder.CreateIntCast(
8517             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8518       } else {
8519         // Pointers are implicitly mapped with a zero size and no flags
8520         // (other than first map that is added for all implicit maps).
8521         CurMapTypes.push_back(OMP_MAP_NONE);
8522         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8523       }
8524       const VarDecl *VD = CI.getCapturedVar();
8525       auto I = FirstPrivateDecls.find(VD);
8526       if (I != FirstPrivateDecls.end())
8527         IsImplicit = I->getSecond();
8528     } else {
8529       assert(CI.capturesVariable() && "Expected captured reference.");
8530       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8531       QualType ElementType = PtrTy->getPointeeType();
8532       CurSizes.push_back(CGF.Builder.CreateIntCast(
8533           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8534       // The default map type for a scalar/complex type is 'to' because by
8535       // default the value doesn't have to be retrieved. For an aggregate
8536       // type, the default is 'tofrom'.
8537       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8538       const VarDecl *VD = CI.getCapturedVar();
8539       auto I = FirstPrivateDecls.find(VD);
8540       if (I != FirstPrivateDecls.end() &&
8541           VD->getType().isConstant(CGF.getContext())) {
8542         llvm::Constant *Addr =
8543             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8544         // Copy the value of the original variable to the new global copy.
8545         CGF.Builder.CreateMemCpy(
8546             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8547             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8548             CurSizes.back(), /*IsVolatile=*/false);
8549         // Use new global variable as the base pointers.
8550         CurBasePointers.push_back(Addr);
8551         CurPointers.push_back(Addr);
8552       } else {
8553         CurBasePointers.push_back(CV);
8554         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8555           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8556               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8557               AlignmentSource::Decl));
8558           CurPointers.push_back(PtrAddr.getPointer());
8559         } else {
8560           CurPointers.push_back(CV);
8561         }
8562       }
8563       if (I != FirstPrivateDecls.end())
8564         IsImplicit = I->getSecond();
8565     }
8566     // Every default map produces a single argument which is a target parameter.
8567     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8568 
8569     // Add flag stating this is an implicit map.
8570     if (IsImplicit)
8571       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8572   }
8573 };
8574 } // anonymous namespace
8575 
8576 /// Emit the arrays used to pass the captures and map information to the
8577 /// offloading runtime library. If there is no map or capture information,
8578 /// return nullptr by reference.
8579 static void
8580 emitOffloadingArrays(CodeGenFunction &CGF,
8581                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8582                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8583                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8584                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8585                      CGOpenMPRuntime::TargetDataInfo &Info) {
8586   CodeGenModule &CGM = CGF.CGM;
8587   ASTContext &Ctx = CGF.getContext();
8588 
8589   // Reset the array information.
8590   Info.clearArrayInfo();
8591   Info.NumberOfPtrs = BasePointers.size();
8592 
8593   if (Info.NumberOfPtrs) {
8594     // Detect if we have any capture size requiring runtime evaluation of the
8595     // size so that a constant array could be eventually used.
8596     bool hasRuntimeEvaluationCaptureSize = false;
8597     for (llvm::Value *S : Sizes)
8598       if (!isa<llvm::Constant>(S)) {
8599         hasRuntimeEvaluationCaptureSize = true;
8600         break;
8601       }
8602 
8603     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8604     QualType PointerArrayType = Ctx.getConstantArrayType(
8605         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8606         /*IndexTypeQuals=*/0);
8607 
8608     Info.BasePointersArray =
8609         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8610     Info.PointersArray =
8611         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8612 
8613     // If we don't have any VLA types or other types that require runtime
8614     // evaluation, we can use a constant array for the map sizes, otherwise we
8615     // need to fill up the arrays as we do for the pointers.
8616     QualType Int64Ty =
8617         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8618     if (hasRuntimeEvaluationCaptureSize) {
8619       QualType SizeArrayType = Ctx.getConstantArrayType(
8620           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8621           /*IndexTypeQuals=*/0);
8622       Info.SizesArray =
8623           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8624     } else {
8625       // We expect all the sizes to be constant, so we collect them to create
8626       // a constant array.
8627       SmallVector<llvm::Constant *, 16> ConstSizes;
8628       for (llvm::Value *S : Sizes)
8629         ConstSizes.push_back(cast<llvm::Constant>(S));
8630 
8631       auto *SizesArrayInit = llvm::ConstantArray::get(
8632           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8633       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8634       auto *SizesArrayGbl = new llvm::GlobalVariable(
8635           CGM.getModule(), SizesArrayInit->getType(),
8636           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8637           SizesArrayInit, Name);
8638       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8639       Info.SizesArray = SizesArrayGbl;
8640     }
8641 
8642     // The map types are always constant so we don't need to generate code to
8643     // fill arrays. Instead, we create an array constant.
8644     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8645     llvm::copy(MapTypes, Mapping.begin());
8646     llvm::Constant *MapTypesArrayInit =
8647         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8648     std::string MaptypesName =
8649         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8650     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8651         CGM.getModule(), MapTypesArrayInit->getType(),
8652         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8653         MapTypesArrayInit, MaptypesName);
8654     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8655     Info.MapTypesArray = MapTypesArrayGbl;
8656 
8657     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8658       llvm::Value *BPVal = *BasePointers[I];
8659       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8660           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8661           Info.BasePointersArray, 0, I);
8662       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8663           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8664       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8665       CGF.Builder.CreateStore(BPVal, BPAddr);
8666 
8667       if (Info.requiresDevicePointerInfo())
8668         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8669           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8670 
8671       llvm::Value *PVal = Pointers[I];
8672       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8673           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674           Info.PointersArray, 0, I);
8675       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8676           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8677       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8678       CGF.Builder.CreateStore(PVal, PAddr);
8679 
8680       if (hasRuntimeEvaluationCaptureSize) {
8681         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8682             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8683             Info.SizesArray,
8684             /*Idx0=*/0,
8685             /*Idx1=*/I);
8686         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8687         CGF.Builder.CreateStore(
8688             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8689             SAddr);
8690       }
8691     }
8692   }
8693 }
8694 
8695 /// Emit the arguments to be passed to the runtime library based on the
8696 /// arrays of pointers, sizes and map types.
8697 static void emitOffloadingArraysArgument(
8698     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8699     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8700     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8701   CodeGenModule &CGM = CGF.CGM;
8702   if (Info.NumberOfPtrs) {
8703     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8704         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8705         Info.BasePointersArray,
8706         /*Idx0=*/0, /*Idx1=*/0);
8707     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8708         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8709         Info.PointersArray,
8710         /*Idx0=*/0,
8711         /*Idx1=*/0);
8712     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8713         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8714         /*Idx0=*/0, /*Idx1=*/0);
8715     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8716         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8717         Info.MapTypesArray,
8718         /*Idx0=*/0,
8719         /*Idx1=*/0);
8720   } else {
8721     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8722     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8723     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8724     MapTypesArrayArg =
8725         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8726   }
8727 }
8728 
8729 /// Check for inner distribute directive.
8730 static const OMPExecutableDirective *
8731 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8732   const auto *CS = D.getInnermostCapturedStmt();
8733   const auto *Body =
8734       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8735   const Stmt *ChildStmt =
8736       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8737 
8738   if (const auto *NestedDir =
8739           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8740     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8741     switch (D.getDirectiveKind()) {
8742     case OMPD_target:
8743       if (isOpenMPDistributeDirective(DKind))
8744         return NestedDir;
8745       if (DKind == OMPD_teams) {
8746         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8747             /*IgnoreCaptured=*/true);
8748         if (!Body)
8749           return nullptr;
8750         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8751         if (const auto *NND =
8752                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8753           DKind = NND->getDirectiveKind();
8754           if (isOpenMPDistributeDirective(DKind))
8755             return NND;
8756         }
8757       }
8758       return nullptr;
8759     case OMPD_target_teams:
8760       if (isOpenMPDistributeDirective(DKind))
8761         return NestedDir;
8762       return nullptr;
8763     case OMPD_target_parallel:
8764     case OMPD_target_simd:
8765     case OMPD_target_parallel_for:
8766     case OMPD_target_parallel_for_simd:
8767       return nullptr;
8768     case OMPD_target_teams_distribute:
8769     case OMPD_target_teams_distribute_simd:
8770     case OMPD_target_teams_distribute_parallel_for:
8771     case OMPD_target_teams_distribute_parallel_for_simd:
8772     case OMPD_parallel:
8773     case OMPD_for:
8774     case OMPD_parallel_for:
8775     case OMPD_parallel_master:
8776     case OMPD_parallel_sections:
8777     case OMPD_for_simd:
8778     case OMPD_parallel_for_simd:
8779     case OMPD_cancel:
8780     case OMPD_cancellation_point:
8781     case OMPD_ordered:
8782     case OMPD_threadprivate:
8783     case OMPD_allocate:
8784     case OMPD_task:
8785     case OMPD_simd:
8786     case OMPD_sections:
8787     case OMPD_section:
8788     case OMPD_single:
8789     case OMPD_master:
8790     case OMPD_critical:
8791     case OMPD_taskyield:
8792     case OMPD_barrier:
8793     case OMPD_taskwait:
8794     case OMPD_taskgroup:
8795     case OMPD_atomic:
8796     case OMPD_flush:
8797     case OMPD_depobj:
8798     case OMPD_scan:
8799     case OMPD_teams:
8800     case OMPD_target_data:
8801     case OMPD_target_exit_data:
8802     case OMPD_target_enter_data:
8803     case OMPD_distribute:
8804     case OMPD_distribute_simd:
8805     case OMPD_distribute_parallel_for:
8806     case OMPD_distribute_parallel_for_simd:
8807     case OMPD_teams_distribute:
8808     case OMPD_teams_distribute_simd:
8809     case OMPD_teams_distribute_parallel_for:
8810     case OMPD_teams_distribute_parallel_for_simd:
8811     case OMPD_target_update:
8812     case OMPD_declare_simd:
8813     case OMPD_declare_variant:
8814     case OMPD_begin_declare_variant:
8815     case OMPD_end_declare_variant:
8816     case OMPD_declare_target:
8817     case OMPD_end_declare_target:
8818     case OMPD_declare_reduction:
8819     case OMPD_declare_mapper:
8820     case OMPD_taskloop:
8821     case OMPD_taskloop_simd:
8822     case OMPD_master_taskloop:
8823     case OMPD_master_taskloop_simd:
8824     case OMPD_parallel_master_taskloop:
8825     case OMPD_parallel_master_taskloop_simd:
8826     case OMPD_requires:
8827     case OMPD_unknown:
8828       llvm_unreachable("Unexpected directive.");
8829     }
8830   }
8831 
8832   return nullptr;
8833 }
8834 
8835 /// Emit the user-defined mapper function. The code generation follows the
8836 /// pattern in the example below.
8837 /// \code
8838 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8839 ///                                           void *base, void *begin,
8840 ///                                           int64_t size, int64_t type) {
8841 ///   // Allocate space for an array section first.
8842 ///   if (size > 1 && !maptype.IsDelete)
8843 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8844 ///                                 size*sizeof(Ty), clearToFrom(type));
8845 ///   // Map members.
8846 ///   for (unsigned i = 0; i < size; i++) {
8847 ///     // For each component specified by this mapper:
8848 ///     for (auto c : all_components) {
8849 ///       if (c.hasMapper())
8850 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8851 ///                       c.arg_type);
8852 ///       else
8853 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8854 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8855 ///     }
8856 ///   }
8857 ///   // Delete the array section.
8858 ///   if (size > 1 && maptype.IsDelete)
8859 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8860 ///                                 size*sizeof(Ty), clearToFrom(type));
8861 /// }
8862 /// \endcode
8863 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8864                                             CodeGenFunction *CGF) {
8865   if (UDMMap.count(D) > 0)
8866     return;
8867   ASTContext &C = CGM.getContext();
8868   QualType Ty = D->getType();
8869   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8870   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8871   auto *MapperVarDecl =
8872       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8873   SourceLocation Loc = D->getLocation();
8874   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8875 
8876   // Prepare mapper function arguments and attributes.
8877   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8878                               C.VoidPtrTy, ImplicitParamDecl::Other);
8879   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8880                             ImplicitParamDecl::Other);
8881   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8882                              C.VoidPtrTy, ImplicitParamDecl::Other);
8883   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8884                             ImplicitParamDecl::Other);
8885   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8886                             ImplicitParamDecl::Other);
8887   FunctionArgList Args;
8888   Args.push_back(&HandleArg);
8889   Args.push_back(&BaseArg);
8890   Args.push_back(&BeginArg);
8891   Args.push_back(&SizeArg);
8892   Args.push_back(&TypeArg);
8893   const CGFunctionInfo &FnInfo =
8894       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8895   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8896   SmallString<64> TyStr;
8897   llvm::raw_svector_ostream Out(TyStr);
8898   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8899   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8900   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8901                                     Name, &CGM.getModule());
8902   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8903   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8904   // Start the mapper function code generation.
8905   CodeGenFunction MapperCGF(CGM);
8906   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8907   // Compute the starting and end addreses of array elements.
8908   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8909       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8910       C.getPointerType(Int64Ty), Loc);
8911   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8912       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8913       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8914   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8915   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8916       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8917       C.getPointerType(Int64Ty), Loc);
8918   // Prepare common arguments for array initiation and deletion.
8919   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8920       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8921       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8922   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8923       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8924       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8925   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8926       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8927       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8928 
8929   // Emit array initiation if this is an array section and \p MapType indicates
8930   // that memory allocation is required.
8931   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8932   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8933                              ElementSize, HeadBB, /*IsInit=*/true);
8934 
8935   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8936 
8937   // Emit the loop header block.
8938   MapperCGF.EmitBlock(HeadBB);
8939   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8940   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8941   // Evaluate whether the initial condition is satisfied.
8942   llvm::Value *IsEmpty =
8943       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8944   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8945   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8946 
8947   // Emit the loop body block.
8948   MapperCGF.EmitBlock(BodyBB);
8949   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8950       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8951   PtrPHI->addIncoming(PtrBegin, EntryBB);
8952   Address PtrCurrent =
8953       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8954                           .getAlignment()
8955                           .alignmentOfArrayElement(ElementSize));
8956   // Privatize the declared variable of mapper to be the current array element.
8957   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8958   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8959     return MapperCGF
8960         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8961         .getAddress(MapperCGF);
8962   });
8963   (void)Scope.Privatize();
8964 
8965   // Get map clause information. Fill up the arrays with all mapped variables.
8966   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8967   MappableExprsHandler::MapValuesArrayTy Pointers;
8968   MappableExprsHandler::MapValuesArrayTy Sizes;
8969   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8970   MappableExprsHandler MEHandler(*D, MapperCGF);
8971   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8972 
8973   // Call the runtime API __tgt_mapper_num_components to get the number of
8974   // pre-existing components.
8975   llvm::Value *OffloadingArgs[] = {Handle};
8976   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8977       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
8978           CGM.getModule(), OMPRTL___tgt_mapper_num_components),
8979       OffloadingArgs);
8980   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8981       PreviousSize,
8982       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8983 
8984   // Fill up the runtime mapper handle for all components.
8985   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8986     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8987         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8988     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8989         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8990     llvm::Value *CurSizeArg = Sizes[I];
8991 
8992     // Extract the MEMBER_OF field from the map type.
8993     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8994     MapperCGF.EmitBlock(MemberBB);
8995     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8996     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8997         OriMapType,
8998         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8999     llvm::BasicBlock *MemberCombineBB =
9000         MapperCGF.createBasicBlock("omp.member.combine");
9001     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9002     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9003     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9004     // Add the number of pre-existing components to the MEMBER_OF field if it
9005     // is valid.
9006     MapperCGF.EmitBlock(MemberCombineBB);
9007     llvm::Value *CombinedMember =
9008         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9009     // Do nothing if it is not a member of previous components.
9010     MapperCGF.EmitBlock(TypeBB);
9011     llvm::PHINode *MemberMapType =
9012         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9013     MemberMapType->addIncoming(OriMapType, MemberBB);
9014     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9015 
9016     // Combine the map type inherited from user-defined mapper with that
9017     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9018     // bits of the \a MapType, which is the input argument of the mapper
9019     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9020     // bits of MemberMapType.
9021     // [OpenMP 5.0], 1.2.6. map-type decay.
9022     //        | alloc |  to   | from  | tofrom | release | delete
9023     // ----------------------------------------------------------
9024     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9025     // to     | alloc |  to   | alloc |   to   | release | delete
9026     // from   | alloc | alloc | from  |  from  | release | delete
9027     // tofrom | alloc |  to   | from  | tofrom | release | delete
9028     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9029         MapType,
9030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9031                                    MappableExprsHandler::OMP_MAP_FROM));
9032     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9033     llvm::BasicBlock *AllocElseBB =
9034         MapperCGF.createBasicBlock("omp.type.alloc.else");
9035     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9036     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9037     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9038     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9039     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9040     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9041     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9042     MapperCGF.EmitBlock(AllocBB);
9043     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9044         MemberMapType,
9045         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9046                                      MappableExprsHandler::OMP_MAP_FROM)));
9047     MapperCGF.Builder.CreateBr(EndBB);
9048     MapperCGF.EmitBlock(AllocElseBB);
9049     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9050         LeftToFrom,
9051         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9052     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9053     // In case of to, clear OMP_MAP_FROM.
9054     MapperCGF.EmitBlock(ToBB);
9055     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9056         MemberMapType,
9057         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9058     MapperCGF.Builder.CreateBr(EndBB);
9059     MapperCGF.EmitBlock(ToElseBB);
9060     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9061         LeftToFrom,
9062         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9063     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9064     // In case of from, clear OMP_MAP_TO.
9065     MapperCGF.EmitBlock(FromBB);
9066     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9067         MemberMapType,
9068         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9069     // In case of tofrom, do nothing.
9070     MapperCGF.EmitBlock(EndBB);
9071     llvm::PHINode *CurMapType =
9072         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9073     CurMapType->addIncoming(AllocMapType, AllocBB);
9074     CurMapType->addIncoming(ToMapType, ToBB);
9075     CurMapType->addIncoming(FromMapType, FromBB);
9076     CurMapType->addIncoming(MemberMapType, ToElseBB);
9077 
9078     // TODO: call the corresponding mapper function if a user-defined mapper is
9079     // associated with this map clause.
9080     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9081     // data structure.
9082     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9083                                      CurSizeArg, CurMapType};
9084     MapperCGF.EmitRuntimeCall(
9085         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9086             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9087         OffloadingArgs);
9088   }
9089 
9090   // Update the pointer to point to the next element that needs to be mapped,
9091   // and check whether we have mapped all elements.
9092   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9093       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9094   PtrPHI->addIncoming(PtrNext, BodyBB);
9095   llvm::Value *IsDone =
9096       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9097   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9098   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9099 
9100   MapperCGF.EmitBlock(ExitBB);
9101   // Emit array deletion if this is an array section and \p MapType indicates
9102   // that deletion is required.
9103   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9104                              ElementSize, DoneBB, /*IsInit=*/false);
9105 
9106   // Emit the function exit block.
9107   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9108   MapperCGF.FinishFunction();
9109   UDMMap.try_emplace(D, Fn);
9110   if (CGF) {
9111     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9112     Decls.second.push_back(D);
9113   }
9114 }
9115 
9116 /// Emit the array initialization or deletion portion for user-defined mapper
9117 /// code generation. First, it evaluates whether an array section is mapped and
9118 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9119 /// true, and \a MapType indicates to not delete this array, array
9120 /// initialization code is generated. If \a IsInit is false, and \a MapType
9121 /// indicates to not this array, array deletion code is generated.
9122 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9123     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9124     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9125     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9126   StringRef Prefix = IsInit ? ".init" : ".del";
9127 
9128   // Evaluate if this is an array section.
9129   llvm::BasicBlock *IsDeleteBB =
9130       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9131   llvm::BasicBlock *BodyBB =
9132       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9133   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9134       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9135   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9136 
9137   // Evaluate if we are going to delete this section.
9138   MapperCGF.EmitBlock(IsDeleteBB);
9139   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9140       MapType,
9141       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9142   llvm::Value *DeleteCond;
9143   if (IsInit) {
9144     DeleteCond = MapperCGF.Builder.CreateIsNull(
9145         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9146   } else {
9147     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9148         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9149   }
9150   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9151 
9152   MapperCGF.EmitBlock(BodyBB);
9153   // Get the array size by multiplying element size and element number (i.e., \p
9154   // Size).
9155   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9156       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9157   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9158   // memory allocation/deletion purpose only.
9159   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9160       MapType,
9161       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9162                                    MappableExprsHandler::OMP_MAP_FROM)));
9163   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9164   // data structure.
9165   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9166   MapperCGF.EmitRuntimeCall(
9167       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9168           CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9169       OffloadingArgs);
9170 }
9171 
9172 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9173     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9174     llvm::Value *DeviceID,
9175     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9176                                      const OMPLoopDirective &D)>
9177         SizeEmitter) {
9178   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9179   const OMPExecutableDirective *TD = &D;
9180   // Get nested teams distribute kind directive, if any.
9181   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9182     TD = getNestedDistributeDirective(CGM.getContext(), D);
9183   if (!TD)
9184     return;
9185   const auto *LD = cast<OMPLoopDirective>(TD);
9186   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9187                                                      PrePostActionTy &) {
9188     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9189       llvm::Value *Args[] = {DeviceID, NumIterations};
9190       CGF.EmitRuntimeCall(
9191           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9192               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9193           Args);
9194     }
9195   };
9196   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9197 }
9198 
9199 void CGOpenMPRuntime::emitTargetCall(
9200     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9201     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9202     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9203     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9204                                      const OMPLoopDirective &D)>
9205         SizeEmitter) {
9206   if (!CGF.HaveInsertPoint())
9207     return;
9208 
9209   assert(OutlinedFn && "Invalid outlined function!");
9210 
9211   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9212   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9213   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9214   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9215                                             PrePostActionTy &) {
9216     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9217   };
9218   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9219 
9220   CodeGenFunction::OMPTargetDataInfo InputInfo;
9221   llvm::Value *MapTypesArray = nullptr;
9222   // Fill up the pointer arrays and transfer execution to the device.
9223   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9224                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9225                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9226     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9227       // Reverse offloading is not supported, so just execute on the host.
9228       if (RequiresOuterTask) {
9229         CapturedVars.clear();
9230         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9231       }
9232       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9233       return;
9234     }
9235 
9236     // On top of the arrays that were filled up, the target offloading call
9237     // takes as arguments the device id as well as the host pointer. The host
9238     // pointer is used by the runtime library to identify the current target
9239     // region, so it only has to be unique and not necessarily point to
9240     // anything. It could be the pointer to the outlined function that
9241     // implements the target region, but we aren't using that so that the
9242     // compiler doesn't need to keep that, and could therefore inline the host
9243     // function if proven worthwhile during optimization.
9244 
9245     // From this point on, we need to have an ID of the target region defined.
9246     assert(OutlinedFnID && "Invalid outlined function ID!");
9247 
9248     // Emit device ID if any.
9249     llvm::Value *DeviceID;
9250     if (Device.getPointer()) {
9251       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9252               Device.getInt() == OMPC_DEVICE_device_num) &&
9253              "Expected device_num modifier.");
9254       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9255       DeviceID =
9256           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9257     } else {
9258       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9259     }
9260 
9261     // Emit the number of elements in the offloading arrays.
9262     llvm::Value *PointerNum =
9263         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9264 
9265     // Return value of the runtime offloading call.
9266     llvm::Value *Return;
9267 
9268     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9269     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9270 
9271     // Emit tripcount for the target loop-based directive.
9272     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9273 
9274     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9275     // The target region is an outlined function launched by the runtime
9276     // via calls __tgt_target() or __tgt_target_teams().
9277     //
9278     // __tgt_target() launches a target region with one team and one thread,
9279     // executing a serial region.  This master thread may in turn launch
9280     // more threads within its team upon encountering a parallel region,
9281     // however, no additional teams can be launched on the device.
9282     //
9283     // __tgt_target_teams() launches a target region with one or more teams,
9284     // each with one or more threads.  This call is required for target
9285     // constructs such as:
9286     //  'target teams'
9287     //  'target' / 'teams'
9288     //  'target teams distribute parallel for'
9289     //  'target parallel'
9290     // and so on.
9291     //
9292     // Note that on the host and CPU targets, the runtime implementation of
9293     // these calls simply call the outlined function without forking threads.
9294     // The outlined functions themselves have runtime calls to
9295     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9296     // the compiler in emitTeamsCall() and emitParallelCall().
9297     //
9298     // In contrast, on the NVPTX target, the implementation of
9299     // __tgt_target_teams() launches a GPU kernel with the requested number
9300     // of teams and threads so no additional calls to the runtime are required.
9301     if (NumTeams) {
9302       // If we have NumTeams defined this means that we have an enclosed teams
9303       // region. Therefore we also expect to have NumThreads defined. These two
9304       // values should be defined in the presence of a teams directive,
9305       // regardless of having any clauses associated. If the user is using teams
9306       // but no clauses, these two values will be the default that should be
9307       // passed to the runtime library - a 32-bit integer with the value zero.
9308       assert(NumThreads && "Thread limit expression should be available along "
9309                            "with number of teams.");
9310       llvm::Value *OffloadingArgs[] = {DeviceID,
9311                                        OutlinedFnID,
9312                                        PointerNum,
9313                                        InputInfo.BasePointersArray.getPointer(),
9314                                        InputInfo.PointersArray.getPointer(),
9315                                        InputInfo.SizesArray.getPointer(),
9316                                        MapTypesArray,
9317                                        NumTeams,
9318                                        NumThreads};
9319       Return = CGF.EmitRuntimeCall(
9320           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9321               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9322                                          : OMPRTL___tgt_target_teams),
9323           OffloadingArgs);
9324     } else {
9325       llvm::Value *OffloadingArgs[] = {DeviceID,
9326                                        OutlinedFnID,
9327                                        PointerNum,
9328                                        InputInfo.BasePointersArray.getPointer(),
9329                                        InputInfo.PointersArray.getPointer(),
9330                                        InputInfo.SizesArray.getPointer(),
9331                                        MapTypesArray};
9332       Return = CGF.EmitRuntimeCall(
9333           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9334               CGM.getModule(),
9335               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9336           OffloadingArgs);
9337     }
9338 
9339     // Check the error code and execute the host version if required.
9340     llvm::BasicBlock *OffloadFailedBlock =
9341         CGF.createBasicBlock("omp_offload.failed");
9342     llvm::BasicBlock *OffloadContBlock =
9343         CGF.createBasicBlock("omp_offload.cont");
9344     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9345     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9346 
9347     CGF.EmitBlock(OffloadFailedBlock);
9348     if (RequiresOuterTask) {
9349       CapturedVars.clear();
9350       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9351     }
9352     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9353     CGF.EmitBranch(OffloadContBlock);
9354 
9355     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9356   };
9357 
9358   // Notify that the host version must be executed.
9359   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9360                     RequiresOuterTask](CodeGenFunction &CGF,
9361                                        PrePostActionTy &) {
9362     if (RequiresOuterTask) {
9363       CapturedVars.clear();
9364       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9365     }
9366     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9367   };
9368 
9369   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9370                           &CapturedVars, RequiresOuterTask,
9371                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9372     // Fill up the arrays with all the captured variables.
9373     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9374     MappableExprsHandler::MapValuesArrayTy Pointers;
9375     MappableExprsHandler::MapValuesArrayTy Sizes;
9376     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9377 
9378     // Get mappable expression information.
9379     MappableExprsHandler MEHandler(D, CGF);
9380     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9381 
9382     auto RI = CS.getCapturedRecordDecl()->field_begin();
9383     auto CV = CapturedVars.begin();
9384     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9385                                               CE = CS.capture_end();
9386          CI != CE; ++CI, ++RI, ++CV) {
9387       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9388       MappableExprsHandler::MapValuesArrayTy CurPointers;
9389       MappableExprsHandler::MapValuesArrayTy CurSizes;
9390       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9391       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9392 
9393       // VLA sizes are passed to the outlined region by copy and do not have map
9394       // information associated.
9395       if (CI->capturesVariableArrayType()) {
9396         CurBasePointers.push_back(*CV);
9397         CurPointers.push_back(*CV);
9398         CurSizes.push_back(CGF.Builder.CreateIntCast(
9399             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9400         // Copy to the device as an argument. No need to retrieve it.
9401         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9402                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9403                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9404       } else {
9405         // If we have any information in the map clause, we use it, otherwise we
9406         // just do a default mapping.
9407         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9408                                          CurSizes, CurMapTypes, PartialStruct);
9409         if (CurBasePointers.empty())
9410           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9411                                            CurPointers, CurSizes, CurMapTypes);
9412         // Generate correct mapping for variables captured by reference in
9413         // lambdas.
9414         if (CI->capturesVariable())
9415           MEHandler.generateInfoForLambdaCaptures(
9416               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9417               CurMapTypes, LambdaPointers);
9418       }
9419       // We expect to have at least an element of information for this capture.
9420       assert(!CurBasePointers.empty() &&
9421              "Non-existing map pointer for capture!");
9422       assert(CurBasePointers.size() == CurPointers.size() &&
9423              CurBasePointers.size() == CurSizes.size() &&
9424              CurBasePointers.size() == CurMapTypes.size() &&
9425              "Inconsistent map information sizes!");
9426 
9427       // If there is an entry in PartialStruct it means we have a struct with
9428       // individual members mapped. Emit an extra combined entry.
9429       if (PartialStruct.Base.isValid())
9430         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9431                                     CurMapTypes, PartialStruct);
9432 
9433       // We need to append the results of this capture to what we already have.
9434       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9435       Pointers.append(CurPointers.begin(), CurPointers.end());
9436       Sizes.append(CurSizes.begin(), CurSizes.end());
9437       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9438     }
9439     // Adjust MEMBER_OF flags for the lambdas captures.
9440     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9441                                               Pointers, MapTypes);
9442     // Map other list items in the map clause which are not captured variables
9443     // but "declare target link" global variables.
9444     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9445                                                MapTypes);
9446 
9447     TargetDataInfo Info;
9448     // Fill up the arrays and create the arguments.
9449     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9450     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9451                                  Info.PointersArray, Info.SizesArray,
9452                                  Info.MapTypesArray, Info);
9453     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9454     InputInfo.BasePointersArray =
9455         Address(Info.BasePointersArray, CGM.getPointerAlign());
9456     InputInfo.PointersArray =
9457         Address(Info.PointersArray, CGM.getPointerAlign());
9458     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9459     MapTypesArray = Info.MapTypesArray;
9460     if (RequiresOuterTask)
9461       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9462     else
9463       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9464   };
9465 
9466   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9467                              CodeGenFunction &CGF, PrePostActionTy &) {
9468     if (RequiresOuterTask) {
9469       CodeGenFunction::OMPTargetDataInfo InputInfo;
9470       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9471     } else {
9472       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9473     }
9474   };
9475 
9476   // If we have a target function ID it means that we need to support
9477   // offloading, otherwise, just execute on the host. We need to execute on host
9478   // regardless of the conditional in the if clause if, e.g., the user do not
9479   // specify target triples.
9480   if (OutlinedFnID) {
9481     if (IfCond) {
9482       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9483     } else {
9484       RegionCodeGenTy ThenRCG(TargetThenGen);
9485       ThenRCG(CGF);
9486     }
9487   } else {
9488     RegionCodeGenTy ElseRCG(TargetElseGen);
9489     ElseRCG(CGF);
9490   }
9491 }
9492 
9493 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9494                                                     StringRef ParentName) {
9495   if (!S)
9496     return;
9497 
9498   // Codegen OMP target directives that offload compute to the device.
9499   bool RequiresDeviceCodegen =
9500       isa<OMPExecutableDirective>(S) &&
9501       isOpenMPTargetExecutionDirective(
9502           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9503 
9504   if (RequiresDeviceCodegen) {
9505     const auto &E = *cast<OMPExecutableDirective>(S);
9506     unsigned DeviceID;
9507     unsigned FileID;
9508     unsigned Line;
9509     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9510                              FileID, Line);
9511 
9512     // Is this a target region that should not be emitted as an entry point? If
9513     // so just signal we are done with this target region.
9514     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9515                                                             ParentName, Line))
9516       return;
9517 
9518     switch (E.getDirectiveKind()) {
9519     case OMPD_target:
9520       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9521                                                    cast<OMPTargetDirective>(E));
9522       break;
9523     case OMPD_target_parallel:
9524       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9525           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9526       break;
9527     case OMPD_target_teams:
9528       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9529           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9530       break;
9531     case OMPD_target_teams_distribute:
9532       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9533           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9534       break;
9535     case OMPD_target_teams_distribute_simd:
9536       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9537           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9538       break;
9539     case OMPD_target_parallel_for:
9540       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9541           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9542       break;
9543     case OMPD_target_parallel_for_simd:
9544       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9545           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9546       break;
9547     case OMPD_target_simd:
9548       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9549           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9550       break;
9551     case OMPD_target_teams_distribute_parallel_for:
9552       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9553           CGM, ParentName,
9554           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9555       break;
9556     case OMPD_target_teams_distribute_parallel_for_simd:
9557       CodeGenFunction::
9558           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9559               CGM, ParentName,
9560               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9561       break;
9562     case OMPD_parallel:
9563     case OMPD_for:
9564     case OMPD_parallel_for:
9565     case OMPD_parallel_master:
9566     case OMPD_parallel_sections:
9567     case OMPD_for_simd:
9568     case OMPD_parallel_for_simd:
9569     case OMPD_cancel:
9570     case OMPD_cancellation_point:
9571     case OMPD_ordered:
9572     case OMPD_threadprivate:
9573     case OMPD_allocate:
9574     case OMPD_task:
9575     case OMPD_simd:
9576     case OMPD_sections:
9577     case OMPD_section:
9578     case OMPD_single:
9579     case OMPD_master:
9580     case OMPD_critical:
9581     case OMPD_taskyield:
9582     case OMPD_barrier:
9583     case OMPD_taskwait:
9584     case OMPD_taskgroup:
9585     case OMPD_atomic:
9586     case OMPD_flush:
9587     case OMPD_depobj:
9588     case OMPD_scan:
9589     case OMPD_teams:
9590     case OMPD_target_data:
9591     case OMPD_target_exit_data:
9592     case OMPD_target_enter_data:
9593     case OMPD_distribute:
9594     case OMPD_distribute_simd:
9595     case OMPD_distribute_parallel_for:
9596     case OMPD_distribute_parallel_for_simd:
9597     case OMPD_teams_distribute:
9598     case OMPD_teams_distribute_simd:
9599     case OMPD_teams_distribute_parallel_for:
9600     case OMPD_teams_distribute_parallel_for_simd:
9601     case OMPD_target_update:
9602     case OMPD_declare_simd:
9603     case OMPD_declare_variant:
9604     case OMPD_begin_declare_variant:
9605     case OMPD_end_declare_variant:
9606     case OMPD_declare_target:
9607     case OMPD_end_declare_target:
9608     case OMPD_declare_reduction:
9609     case OMPD_declare_mapper:
9610     case OMPD_taskloop:
9611     case OMPD_taskloop_simd:
9612     case OMPD_master_taskloop:
9613     case OMPD_master_taskloop_simd:
9614     case OMPD_parallel_master_taskloop:
9615     case OMPD_parallel_master_taskloop_simd:
9616     case OMPD_requires:
9617     case OMPD_unknown:
9618       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9619     }
9620     return;
9621   }
9622 
9623   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9624     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9625       return;
9626 
9627     scanForTargetRegionsFunctions(
9628         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9629     return;
9630   }
9631 
9632   // If this is a lambda function, look into its body.
9633   if (const auto *L = dyn_cast<LambdaExpr>(S))
9634     S = L->getBody();
9635 
9636   // Keep looking for target regions recursively.
9637   for (const Stmt *II : S->children())
9638     scanForTargetRegionsFunctions(II, ParentName);
9639 }
9640 
9641 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9642   // If emitting code for the host, we do not process FD here. Instead we do
9643   // the normal code generation.
9644   if (!CGM.getLangOpts().OpenMPIsDevice) {
9645     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9646       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9647           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9648       // Do not emit device_type(nohost) functions for the host.
9649       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9650         return true;
9651     }
9652     return false;
9653   }
9654 
9655   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9656   // Try to detect target regions in the function.
9657   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9658     StringRef Name = CGM.getMangledName(GD);
9659     scanForTargetRegionsFunctions(FD->getBody(), Name);
9660     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9661         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9662     // Do not emit device_type(nohost) functions for the host.
9663     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9664       return true;
9665   }
9666 
9667   // Do not to emit function if it is not marked as declare target.
9668   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9669          AlreadyEmittedTargetDecls.count(VD) == 0;
9670 }
9671 
9672 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9673   if (!CGM.getLangOpts().OpenMPIsDevice)
9674     return false;
9675 
9676   // Check if there are Ctors/Dtors in this declaration and look for target
9677   // regions in it. We use the complete variant to produce the kernel name
9678   // mangling.
9679   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9680   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9681     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9682       StringRef ParentName =
9683           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9684       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9685     }
9686     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9687       StringRef ParentName =
9688           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9689       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9690     }
9691   }
9692 
9693   // Do not to emit variable if it is not marked as declare target.
9694   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9695       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9696           cast<VarDecl>(GD.getDecl()));
9697   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9698       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9699        HasRequiresUnifiedSharedMemory)) {
9700     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9701     return true;
9702   }
9703   return false;
9704 }
9705 
9706 llvm::Constant *
9707 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9708                                                 const VarDecl *VD) {
9709   assert(VD->getType().isConstant(CGM.getContext()) &&
9710          "Expected constant variable.");
9711   StringRef VarName;
9712   llvm::Constant *Addr;
9713   llvm::GlobalValue::LinkageTypes Linkage;
9714   QualType Ty = VD->getType();
9715   SmallString<128> Buffer;
9716   {
9717     unsigned DeviceID;
9718     unsigned FileID;
9719     unsigned Line;
9720     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9721                              FileID, Line);
9722     llvm::raw_svector_ostream OS(Buffer);
9723     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9724        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9725     VarName = OS.str();
9726   }
9727   Linkage = llvm::GlobalValue::InternalLinkage;
9728   Addr =
9729       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9730                                   getDefaultFirstprivateAddressSpace());
9731   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9732   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9733   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9734   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9735       VarName, Addr, VarSize,
9736       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9737   return Addr;
9738 }
9739 
9740 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9741                                                    llvm::Constant *Addr) {
9742   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9743       !CGM.getLangOpts().OpenMPIsDevice)
9744     return;
9745   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9746       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9747   if (!Res) {
9748     if (CGM.getLangOpts().OpenMPIsDevice) {
9749       // Register non-target variables being emitted in device code (debug info
9750       // may cause this).
9751       StringRef VarName = CGM.getMangledName(VD);
9752       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9753     }
9754     return;
9755   }
9756   // Register declare target variables.
9757   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9758   StringRef VarName;
9759   CharUnits VarSize;
9760   llvm::GlobalValue::LinkageTypes Linkage;
9761 
9762   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9763       !HasRequiresUnifiedSharedMemory) {
9764     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9765     VarName = CGM.getMangledName(VD);
9766     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9767       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9768       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9769     } else {
9770       VarSize = CharUnits::Zero();
9771     }
9772     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9773     // Temp solution to prevent optimizations of the internal variables.
9774     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9775       std::string RefName = getName({VarName, "ref"});
9776       if (!CGM.GetGlobalValue(RefName)) {
9777         llvm::Constant *AddrRef =
9778             getOrCreateInternalVariable(Addr->getType(), RefName);
9779         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9780         GVAddrRef->setConstant(/*Val=*/true);
9781         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9782         GVAddrRef->setInitializer(Addr);
9783         CGM.addCompilerUsedGlobal(GVAddrRef);
9784       }
9785     }
9786   } else {
9787     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9788             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9789              HasRequiresUnifiedSharedMemory)) &&
9790            "Declare target attribute must link or to with unified memory.");
9791     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9792       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9793     else
9794       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9795 
9796     if (CGM.getLangOpts().OpenMPIsDevice) {
9797       VarName = Addr->getName();
9798       Addr = nullptr;
9799     } else {
9800       VarName = getAddrOfDeclareTargetVar(VD).getName();
9801       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9802     }
9803     VarSize = CGM.getPointerSize();
9804     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9805   }
9806 
9807   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9808       VarName, Addr, VarSize, Flags, Linkage);
9809 }
9810 
9811 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9812   if (isa<FunctionDecl>(GD.getDecl()) ||
9813       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9814     return emitTargetFunctions(GD);
9815 
9816   return emitTargetGlobalVariable(GD);
9817 }
9818 
9819 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9820   for (const VarDecl *VD : DeferredGlobalVariables) {
9821     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9822         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9823     if (!Res)
9824       continue;
9825     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9826         !HasRequiresUnifiedSharedMemory) {
9827       CGM.EmitGlobal(VD);
9828     } else {
9829       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9830               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9831                HasRequiresUnifiedSharedMemory)) &&
9832              "Expected link clause or to clause with unified memory.");
9833       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9834     }
9835   }
9836 }
9837 
9838 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9839     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9840   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9841          " Expected target-based directive.");
9842 }
9843 
9844 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9845   for (const OMPClause *Clause : D->clauselists()) {
9846     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9847       HasRequiresUnifiedSharedMemory = true;
9848     } else if (const auto *AC =
9849                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9850       switch (AC->getAtomicDefaultMemOrderKind()) {
9851       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9852         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9853         break;
9854       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9855         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9856         break;
9857       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9858         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9859         break;
9860       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9861         break;
9862       }
9863     }
9864   }
9865 }
9866 
9867 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9868   return RequiresAtomicOrdering;
9869 }
9870 
9871 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9872                                                        LangAS &AS) {
9873   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9874     return false;
9875   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9876   switch(A->getAllocatorType()) {
9877   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9878   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9879   // Not supported, fallback to the default mem space.
9880   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9881   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9882   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9883   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9884   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9885   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9886   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9887     AS = LangAS::Default;
9888     return true;
9889   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9890     llvm_unreachable("Expected predefined allocator for the variables with the "
9891                      "static storage.");
9892   }
9893   return false;
9894 }
9895 
9896 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9897   return HasRequiresUnifiedSharedMemory;
9898 }
9899 
9900 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9901     CodeGenModule &CGM)
9902     : CGM(CGM) {
9903   if (CGM.getLangOpts().OpenMPIsDevice) {
9904     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9905     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9906   }
9907 }
9908 
9909 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9910   if (CGM.getLangOpts().OpenMPIsDevice)
9911     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9912 }
9913 
9914 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9915   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9916     return true;
9917 
9918   const auto *D = cast<FunctionDecl>(GD.getDecl());
9919   // Do not to emit function if it is marked as declare target as it was already
9920   // emitted.
9921   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9922     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9923       if (auto *F = dyn_cast_or_null<llvm::Function>(
9924               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9925         return !F->isDeclaration();
9926       return false;
9927     }
9928     return true;
9929   }
9930 
9931   return !AlreadyEmittedTargetDecls.insert(D).second;
9932 }
9933 
9934 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9935   // If we don't have entries or if we are emitting code for the device, we
9936   // don't need to do anything.
9937   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9938       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9939       (OffloadEntriesInfoManager.empty() &&
9940        !HasEmittedDeclareTargetRegion &&
9941        !HasEmittedTargetRegion))
9942     return nullptr;
9943 
9944   // Create and register the function that handles the requires directives.
9945   ASTContext &C = CGM.getContext();
9946 
9947   llvm::Function *RequiresRegFn;
9948   {
9949     CodeGenFunction CGF(CGM);
9950     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9951     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9952     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9953     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9954     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9955     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9956     // TODO: check for other requires clauses.
9957     // The requires directive takes effect only when a target region is
9958     // present in the compilation unit. Otherwise it is ignored and not
9959     // passed to the runtime. This avoids the runtime from throwing an error
9960     // for mismatching requires clauses across compilation units that don't
9961     // contain at least 1 target region.
9962     assert((HasEmittedTargetRegion ||
9963             HasEmittedDeclareTargetRegion ||
9964             !OffloadEntriesInfoManager.empty()) &&
9965            "Target or declare target region expected.");
9966     if (HasRequiresUnifiedSharedMemory)
9967       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9968     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9969                             CGM.getModule(), OMPRTL___tgt_register_requires),
9970                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9971     CGF.FinishFunction();
9972   }
9973   return RequiresRegFn;
9974 }
9975 
9976 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9977                                     const OMPExecutableDirective &D,
9978                                     SourceLocation Loc,
9979                                     llvm::Function *OutlinedFn,
9980                                     ArrayRef<llvm::Value *> CapturedVars) {
9981   if (!CGF.HaveInsertPoint())
9982     return;
9983 
9984   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9985   CodeGenFunction::RunCleanupsScope Scope(CGF);
9986 
9987   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9988   llvm::Value *Args[] = {
9989       RTLoc,
9990       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9991       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9992   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9993   RealArgs.append(std::begin(Args), std::end(Args));
9994   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9995 
9996   llvm::FunctionCallee RTLFn =
9997       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9998           CGM.getModule(), OMPRTL___kmpc_fork_teams);
9999   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10000 }
10001 
10002 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10003                                          const Expr *NumTeams,
10004                                          const Expr *ThreadLimit,
10005                                          SourceLocation Loc) {
10006   if (!CGF.HaveInsertPoint())
10007     return;
10008 
10009   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10010 
10011   llvm::Value *NumTeamsVal =
10012       NumTeams
10013           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10014                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10015           : CGF.Builder.getInt32(0);
10016 
10017   llvm::Value *ThreadLimitVal =
10018       ThreadLimit
10019           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10020                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10021           : CGF.Builder.getInt32(0);
10022 
10023   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10024   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10025                                      ThreadLimitVal};
10026   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10027                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10028                       PushNumTeamsArgs);
10029 }
10030 
10031 void CGOpenMPRuntime::emitTargetDataCalls(
10032     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10033     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10034   if (!CGF.HaveInsertPoint())
10035     return;
10036 
10037   // Action used to replace the default codegen action and turn privatization
10038   // off.
10039   PrePostActionTy NoPrivAction;
10040 
10041   // Generate the code for the opening of the data environment. Capture all the
10042   // arguments of the runtime call by reference because they are used in the
10043   // closing of the region.
10044   auto &&BeginThenGen = [this, &D, Device, &Info,
10045                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10046     // Fill up the arrays with all the mapped variables.
10047     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10048     MappableExprsHandler::MapValuesArrayTy Pointers;
10049     MappableExprsHandler::MapValuesArrayTy Sizes;
10050     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10051 
10052     // Get map clause information.
10053     MappableExprsHandler MCHandler(D, CGF);
10054     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10055 
10056     // Fill up the arrays and create the arguments.
10057     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10058 
10059     llvm::Value *BasePointersArrayArg = nullptr;
10060     llvm::Value *PointersArrayArg = nullptr;
10061     llvm::Value *SizesArrayArg = nullptr;
10062     llvm::Value *MapTypesArrayArg = nullptr;
10063     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10064                                  SizesArrayArg, MapTypesArrayArg, Info);
10065 
10066     // Emit device ID if any.
10067     llvm::Value *DeviceID = nullptr;
10068     if (Device) {
10069       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10070                                            CGF.Int64Ty, /*isSigned=*/true);
10071     } else {
10072       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10073     }
10074 
10075     // Emit the number of elements in the offloading arrays.
10076     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10077 
10078     llvm::Value *OffloadingArgs[] = {
10079         DeviceID,         PointerNum,    BasePointersArrayArg,
10080         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10081     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10082                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10083                         OffloadingArgs);
10084 
10085     // If device pointer privatization is required, emit the body of the region
10086     // here. It will have to be duplicated: with and without privatization.
10087     if (!Info.CaptureDeviceAddrMap.empty())
10088       CodeGen(CGF);
10089   };
10090 
10091   // Generate code for the closing of the data region.
10092   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10093                                             PrePostActionTy &) {
10094     assert(Info.isValid() && "Invalid data environment closing arguments.");
10095 
10096     llvm::Value *BasePointersArrayArg = nullptr;
10097     llvm::Value *PointersArrayArg = nullptr;
10098     llvm::Value *SizesArrayArg = nullptr;
10099     llvm::Value *MapTypesArrayArg = nullptr;
10100     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10101                                  SizesArrayArg, MapTypesArrayArg, Info);
10102 
10103     // Emit device ID if any.
10104     llvm::Value *DeviceID = nullptr;
10105     if (Device) {
10106       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10107                                            CGF.Int64Ty, /*isSigned=*/true);
10108     } else {
10109       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10110     }
10111 
10112     // Emit the number of elements in the offloading arrays.
10113     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10114 
10115     llvm::Value *OffloadingArgs[] = {
10116         DeviceID,         PointerNum,    BasePointersArrayArg,
10117         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10118     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10119                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10120                         OffloadingArgs);
10121   };
10122 
10123   // If we need device pointer privatization, we need to emit the body of the
10124   // region with no privatization in the 'else' branch of the conditional.
10125   // Otherwise, we don't have to do anything.
10126   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10127                                                          PrePostActionTy &) {
10128     if (!Info.CaptureDeviceAddrMap.empty()) {
10129       CodeGen.setAction(NoPrivAction);
10130       CodeGen(CGF);
10131     }
10132   };
10133 
10134   // We don't have to do anything to close the region if the if clause evaluates
10135   // to false.
10136   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10137 
10138   if (IfCond) {
10139     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10140   } else {
10141     RegionCodeGenTy RCG(BeginThenGen);
10142     RCG(CGF);
10143   }
10144 
10145   // If we don't require privatization of device pointers, we emit the body in
10146   // between the runtime calls. This avoids duplicating the body code.
10147   if (Info.CaptureDeviceAddrMap.empty()) {
10148     CodeGen.setAction(NoPrivAction);
10149     CodeGen(CGF);
10150   }
10151 
10152   if (IfCond) {
10153     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10154   } else {
10155     RegionCodeGenTy RCG(EndThenGen);
10156     RCG(CGF);
10157   }
10158 }
10159 
10160 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10161     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10162     const Expr *Device) {
10163   if (!CGF.HaveInsertPoint())
10164     return;
10165 
10166   assert((isa<OMPTargetEnterDataDirective>(D) ||
10167           isa<OMPTargetExitDataDirective>(D) ||
10168           isa<OMPTargetUpdateDirective>(D)) &&
10169          "Expecting either target enter, exit data, or update directives.");
10170 
10171   CodeGenFunction::OMPTargetDataInfo InputInfo;
10172   llvm::Value *MapTypesArray = nullptr;
10173   // Generate the code for the opening of the data environment.
10174   auto &&ThenGen = [this, &D, Device, &InputInfo,
10175                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10176     // Emit device ID if any.
10177     llvm::Value *DeviceID = nullptr;
10178     if (Device) {
10179       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10180                                            CGF.Int64Ty, /*isSigned=*/true);
10181     } else {
10182       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10183     }
10184 
10185     // Emit the number of elements in the offloading arrays.
10186     llvm::Constant *PointerNum =
10187         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10188 
10189     llvm::Value *OffloadingArgs[] = {DeviceID,
10190                                      PointerNum,
10191                                      InputInfo.BasePointersArray.getPointer(),
10192                                      InputInfo.PointersArray.getPointer(),
10193                                      InputInfo.SizesArray.getPointer(),
10194                                      MapTypesArray};
10195 
10196     // Select the right runtime function call for each expected standalone
10197     // directive.
10198     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10199     RuntimeFunction RTLFn;
10200     switch (D.getDirectiveKind()) {
10201     case OMPD_target_enter_data:
10202       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10203                         : OMPRTL___tgt_target_data_begin;
10204       break;
10205     case OMPD_target_exit_data:
10206       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10207                         : OMPRTL___tgt_target_data_end;
10208       break;
10209     case OMPD_target_update:
10210       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10211                         : OMPRTL___tgt_target_data_update;
10212       break;
10213     case OMPD_parallel:
10214     case OMPD_for:
10215     case OMPD_parallel_for:
10216     case OMPD_parallel_master:
10217     case OMPD_parallel_sections:
10218     case OMPD_for_simd:
10219     case OMPD_parallel_for_simd:
10220     case OMPD_cancel:
10221     case OMPD_cancellation_point:
10222     case OMPD_ordered:
10223     case OMPD_threadprivate:
10224     case OMPD_allocate:
10225     case OMPD_task:
10226     case OMPD_simd:
10227     case OMPD_sections:
10228     case OMPD_section:
10229     case OMPD_single:
10230     case OMPD_master:
10231     case OMPD_critical:
10232     case OMPD_taskyield:
10233     case OMPD_barrier:
10234     case OMPD_taskwait:
10235     case OMPD_taskgroup:
10236     case OMPD_atomic:
10237     case OMPD_flush:
10238     case OMPD_depobj:
10239     case OMPD_scan:
10240     case OMPD_teams:
10241     case OMPD_target_data:
10242     case OMPD_distribute:
10243     case OMPD_distribute_simd:
10244     case OMPD_distribute_parallel_for:
10245     case OMPD_distribute_parallel_for_simd:
10246     case OMPD_teams_distribute:
10247     case OMPD_teams_distribute_simd:
10248     case OMPD_teams_distribute_parallel_for:
10249     case OMPD_teams_distribute_parallel_for_simd:
10250     case OMPD_declare_simd:
10251     case OMPD_declare_variant:
10252     case OMPD_begin_declare_variant:
10253     case OMPD_end_declare_variant:
10254     case OMPD_declare_target:
10255     case OMPD_end_declare_target:
10256     case OMPD_declare_reduction:
10257     case OMPD_declare_mapper:
10258     case OMPD_taskloop:
10259     case OMPD_taskloop_simd:
10260     case OMPD_master_taskloop:
10261     case OMPD_master_taskloop_simd:
10262     case OMPD_parallel_master_taskloop:
10263     case OMPD_parallel_master_taskloop_simd:
10264     case OMPD_target:
10265     case OMPD_target_simd:
10266     case OMPD_target_teams_distribute:
10267     case OMPD_target_teams_distribute_simd:
10268     case OMPD_target_teams_distribute_parallel_for:
10269     case OMPD_target_teams_distribute_parallel_for_simd:
10270     case OMPD_target_teams:
10271     case OMPD_target_parallel:
10272     case OMPD_target_parallel_for:
10273     case OMPD_target_parallel_for_simd:
10274     case OMPD_requires:
10275     case OMPD_unknown:
10276       llvm_unreachable("Unexpected standalone target data directive.");
10277       break;
10278     }
10279     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10280                             CGM.getModule(), RTLFn),
10281                         OffloadingArgs);
10282   };
10283 
10284   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10285                              CodeGenFunction &CGF, PrePostActionTy &) {
10286     // Fill up the arrays with all the mapped variables.
10287     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10288     MappableExprsHandler::MapValuesArrayTy Pointers;
10289     MappableExprsHandler::MapValuesArrayTy Sizes;
10290     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10291 
10292     // Get map clause information.
10293     MappableExprsHandler MEHandler(D, CGF);
10294     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10295 
10296     TargetDataInfo Info;
10297     // Fill up the arrays and create the arguments.
10298     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10299     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10300                                  Info.PointersArray, Info.SizesArray,
10301                                  Info.MapTypesArray, Info);
10302     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10303     InputInfo.BasePointersArray =
10304         Address(Info.BasePointersArray, CGM.getPointerAlign());
10305     InputInfo.PointersArray =
10306         Address(Info.PointersArray, CGM.getPointerAlign());
10307     InputInfo.SizesArray =
10308         Address(Info.SizesArray, CGM.getPointerAlign());
10309     MapTypesArray = Info.MapTypesArray;
10310     if (D.hasClausesOfKind<OMPDependClause>())
10311       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10312     else
10313       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10314   };
10315 
10316   if (IfCond) {
10317     emitIfClause(CGF, IfCond, TargetThenGen,
10318                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10319   } else {
10320     RegionCodeGenTy ThenRCG(TargetThenGen);
10321     ThenRCG(CGF);
10322   }
10323 }
10324 
10325 namespace {
10326   /// Kind of parameter in a function with 'declare simd' directive.
10327   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10328   /// Attribute set of the parameter.
10329   struct ParamAttrTy {
10330     ParamKindTy Kind = Vector;
10331     llvm::APSInt StrideOrArg;
10332     llvm::APSInt Alignment;
10333   };
10334 } // namespace
10335 
10336 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10337                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10338   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10339   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10340   // of that clause. The VLEN value must be power of 2.
10341   // In other case the notion of the function`s "characteristic data type" (CDT)
10342   // is used to compute the vector length.
10343   // CDT is defined in the following order:
10344   //   a) For non-void function, the CDT is the return type.
10345   //   b) If the function has any non-uniform, non-linear parameters, then the
10346   //   CDT is the type of the first such parameter.
10347   //   c) If the CDT determined by a) or b) above is struct, union, or class
10348   //   type which is pass-by-value (except for the type that maps to the
10349   //   built-in complex data type), the characteristic data type is int.
10350   //   d) If none of the above three cases is applicable, the CDT is int.
10351   // The VLEN is then determined based on the CDT and the size of vector
10352   // register of that ISA for which current vector version is generated. The
10353   // VLEN is computed using the formula below:
10354   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10355   // where vector register size specified in section 3.2.1 Registers and the
10356   // Stack Frame of original AMD64 ABI document.
10357   QualType RetType = FD->getReturnType();
10358   if (RetType.isNull())
10359     return 0;
10360   ASTContext &C = FD->getASTContext();
10361   QualType CDT;
10362   if (!RetType.isNull() && !RetType->isVoidType()) {
10363     CDT = RetType;
10364   } else {
10365     unsigned Offset = 0;
10366     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10367       if (ParamAttrs[Offset].Kind == Vector)
10368         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10369       ++Offset;
10370     }
10371     if (CDT.isNull()) {
10372       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10373         if (ParamAttrs[I + Offset].Kind == Vector) {
10374           CDT = FD->getParamDecl(I)->getType();
10375           break;
10376         }
10377       }
10378     }
10379   }
10380   if (CDT.isNull())
10381     CDT = C.IntTy;
10382   CDT = CDT->getCanonicalTypeUnqualified();
10383   if (CDT->isRecordType() || CDT->isUnionType())
10384     CDT = C.IntTy;
10385   return C.getTypeSize(CDT);
10386 }
10387 
10388 static void
10389 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10390                            const llvm::APSInt &VLENVal,
10391                            ArrayRef<ParamAttrTy> ParamAttrs,
10392                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10393   struct ISADataTy {
10394     char ISA;
10395     unsigned VecRegSize;
10396   };
10397   ISADataTy ISAData[] = {
10398       {
10399           'b', 128
10400       }, // SSE
10401       {
10402           'c', 256
10403       }, // AVX
10404       {
10405           'd', 256
10406       }, // AVX2
10407       {
10408           'e', 512
10409       }, // AVX512
10410   };
10411   llvm::SmallVector<char, 2> Masked;
10412   switch (State) {
10413   case OMPDeclareSimdDeclAttr::BS_Undefined:
10414     Masked.push_back('N');
10415     Masked.push_back('M');
10416     break;
10417   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10418     Masked.push_back('N');
10419     break;
10420   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10421     Masked.push_back('M');
10422     break;
10423   }
10424   for (char Mask : Masked) {
10425     for (const ISADataTy &Data : ISAData) {
10426       SmallString<256> Buffer;
10427       llvm::raw_svector_ostream Out(Buffer);
10428       Out << "_ZGV" << Data.ISA << Mask;
10429       if (!VLENVal) {
10430         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10431         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10432         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10433       } else {
10434         Out << VLENVal;
10435       }
10436       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10437         switch (ParamAttr.Kind){
10438         case LinearWithVarStride:
10439           Out << 's' << ParamAttr.StrideOrArg;
10440           break;
10441         case Linear:
10442           Out << 'l';
10443           if (ParamAttr.StrideOrArg != 1)
10444             Out << ParamAttr.StrideOrArg;
10445           break;
10446         case Uniform:
10447           Out << 'u';
10448           break;
10449         case Vector:
10450           Out << 'v';
10451           break;
10452         }
10453         if (!!ParamAttr.Alignment)
10454           Out << 'a' << ParamAttr.Alignment;
10455       }
10456       Out << '_' << Fn->getName();
10457       Fn->addFnAttr(Out.str());
10458     }
10459   }
10460 }
10461 
10462 // This are the Functions that are needed to mangle the name of the
10463 // vector functions generated by the compiler, according to the rules
10464 // defined in the "Vector Function ABI specifications for AArch64",
10465 // available at
10466 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10467 
10468 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10469 ///
10470 /// TODO: Need to implement the behavior for reference marked with a
10471 /// var or no linear modifiers (1.b in the section). For this, we
10472 /// need to extend ParamKindTy to support the linear modifiers.
10473 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10474   QT = QT.getCanonicalType();
10475 
10476   if (QT->isVoidType())
10477     return false;
10478 
10479   if (Kind == ParamKindTy::Uniform)
10480     return false;
10481 
10482   if (Kind == ParamKindTy::Linear)
10483     return false;
10484 
10485   // TODO: Handle linear references with modifiers
10486 
10487   if (Kind == ParamKindTy::LinearWithVarStride)
10488     return false;
10489 
10490   return true;
10491 }
10492 
10493 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10494 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10495   QT = QT.getCanonicalType();
10496   unsigned Size = C.getTypeSize(QT);
10497 
10498   // Only scalars and complex within 16 bytes wide set PVB to true.
10499   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10500     return false;
10501 
10502   if (QT->isFloatingType())
10503     return true;
10504 
10505   if (QT->isIntegerType())
10506     return true;
10507 
10508   if (QT->isPointerType())
10509     return true;
10510 
10511   // TODO: Add support for complex types (section 3.1.2, item 2).
10512 
10513   return false;
10514 }
10515 
10516 /// Computes the lane size (LS) of a return type or of an input parameter,
10517 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10518 /// TODO: Add support for references, section 3.2.1, item 1.
10519 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10520   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10521     QualType PTy = QT.getCanonicalType()->getPointeeType();
10522     if (getAArch64PBV(PTy, C))
10523       return C.getTypeSize(PTy);
10524   }
10525   if (getAArch64PBV(QT, C))
10526     return C.getTypeSize(QT);
10527 
10528   return C.getTypeSize(C.getUIntPtrType());
10529 }
10530 
10531 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10532 // signature of the scalar function, as defined in 3.2.2 of the
10533 // AAVFABI.
10534 static std::tuple<unsigned, unsigned, bool>
10535 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10536   QualType RetType = FD->getReturnType().getCanonicalType();
10537 
10538   ASTContext &C = FD->getASTContext();
10539 
10540   bool OutputBecomesInput = false;
10541 
10542   llvm::SmallVector<unsigned, 8> Sizes;
10543   if (!RetType->isVoidType()) {
10544     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10545     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10546       OutputBecomesInput = true;
10547   }
10548   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10549     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10550     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10551   }
10552 
10553   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10554   // The LS of a function parameter / return value can only be a power
10555   // of 2, starting from 8 bits, up to 128.
10556   assert(std::all_of(Sizes.begin(), Sizes.end(),
10557                      [](unsigned Size) {
10558                        return Size == 8 || Size == 16 || Size == 32 ||
10559                               Size == 64 || Size == 128;
10560                      }) &&
10561          "Invalid size");
10562 
10563   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10564                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10565                          OutputBecomesInput);
10566 }
10567 
10568 /// Mangle the parameter part of the vector function name according to
10569 /// their OpenMP classification. The mangling function is defined in
10570 /// section 3.5 of the AAVFABI.
10571 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10572   SmallString<256> Buffer;
10573   llvm::raw_svector_ostream Out(Buffer);
10574   for (const auto &ParamAttr : ParamAttrs) {
10575     switch (ParamAttr.Kind) {
10576     case LinearWithVarStride:
10577       Out << "ls" << ParamAttr.StrideOrArg;
10578       break;
10579     case Linear:
10580       Out << 'l';
10581       // Don't print the step value if it is not present or if it is
10582       // equal to 1.
10583       if (ParamAttr.StrideOrArg != 1)
10584         Out << ParamAttr.StrideOrArg;
10585       break;
10586     case Uniform:
10587       Out << 'u';
10588       break;
10589     case Vector:
10590       Out << 'v';
10591       break;
10592     }
10593 
10594     if (!!ParamAttr.Alignment)
10595       Out << 'a' << ParamAttr.Alignment;
10596   }
10597 
10598   return std::string(Out.str());
10599 }
10600 
10601 // Function used to add the attribute. The parameter `VLEN` is
10602 // templated to allow the use of "x" when targeting scalable functions
10603 // for SVE.
10604 template <typename T>
10605 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10606                                  char ISA, StringRef ParSeq,
10607                                  StringRef MangledName, bool OutputBecomesInput,
10608                                  llvm::Function *Fn) {
10609   SmallString<256> Buffer;
10610   llvm::raw_svector_ostream Out(Buffer);
10611   Out << Prefix << ISA << LMask << VLEN;
10612   if (OutputBecomesInput)
10613     Out << "v";
10614   Out << ParSeq << "_" << MangledName;
10615   Fn->addFnAttr(Out.str());
10616 }
10617 
10618 // Helper function to generate the Advanced SIMD names depending on
10619 // the value of the NDS when simdlen is not present.
10620 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10621                                       StringRef Prefix, char ISA,
10622                                       StringRef ParSeq, StringRef MangledName,
10623                                       bool OutputBecomesInput,
10624                                       llvm::Function *Fn) {
10625   switch (NDS) {
10626   case 8:
10627     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10628                          OutputBecomesInput, Fn);
10629     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10630                          OutputBecomesInput, Fn);
10631     break;
10632   case 16:
10633     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10634                          OutputBecomesInput, Fn);
10635     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10636                          OutputBecomesInput, Fn);
10637     break;
10638   case 32:
10639     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10640                          OutputBecomesInput, Fn);
10641     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10642                          OutputBecomesInput, Fn);
10643     break;
10644   case 64:
10645   case 128:
10646     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10647                          OutputBecomesInput, Fn);
10648     break;
10649   default:
10650     llvm_unreachable("Scalar type is too wide.");
10651   }
10652 }
10653 
10654 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10655 static void emitAArch64DeclareSimdFunction(
10656     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10657     ArrayRef<ParamAttrTy> ParamAttrs,
10658     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10659     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10660 
10661   // Get basic data for building the vector signature.
10662   const auto Data = getNDSWDS(FD, ParamAttrs);
10663   const unsigned NDS = std::get<0>(Data);
10664   const unsigned WDS = std::get<1>(Data);
10665   const bool OutputBecomesInput = std::get<2>(Data);
10666 
10667   // Check the values provided via `simdlen` by the user.
10668   // 1. A `simdlen(1)` doesn't produce vector signatures,
10669   if (UserVLEN == 1) {
10670     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10671         DiagnosticsEngine::Warning,
10672         "The clause simdlen(1) has no effect when targeting aarch64.");
10673     CGM.getDiags().Report(SLoc, DiagID);
10674     return;
10675   }
10676 
10677   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10678   // Advanced SIMD output.
10679   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10680     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10681         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10682                                     "power of 2 when targeting Advanced SIMD.");
10683     CGM.getDiags().Report(SLoc, DiagID);
10684     return;
10685   }
10686 
10687   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10688   // limits.
10689   if (ISA == 's' && UserVLEN != 0) {
10690     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10691       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10692           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10693                                       "lanes in the architectural constraints "
10694                                       "for SVE (min is 128-bit, max is "
10695                                       "2048-bit, by steps of 128-bit)");
10696       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10697       return;
10698     }
10699   }
10700 
10701   // Sort out parameter sequence.
10702   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10703   StringRef Prefix = "_ZGV";
10704   // Generate simdlen from user input (if any).
10705   if (UserVLEN) {
10706     if (ISA == 's') {
10707       // SVE generates only a masked function.
10708       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10709                            OutputBecomesInput, Fn);
10710     } else {
10711       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10712       // Advanced SIMD generates one or two functions, depending on
10713       // the `[not]inbranch` clause.
10714       switch (State) {
10715       case OMPDeclareSimdDeclAttr::BS_Undefined:
10716         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10717                              OutputBecomesInput, Fn);
10718         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10719                              OutputBecomesInput, Fn);
10720         break;
10721       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10722         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10723                              OutputBecomesInput, Fn);
10724         break;
10725       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10726         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10727                              OutputBecomesInput, Fn);
10728         break;
10729       }
10730     }
10731   } else {
10732     // If no user simdlen is provided, follow the AAVFABI rules for
10733     // generating the vector length.
10734     if (ISA == 's') {
10735       // SVE, section 3.4.1, item 1.
10736       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10737                            OutputBecomesInput, Fn);
10738     } else {
10739       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10740       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10741       // two vector names depending on the use of the clause
10742       // `[not]inbranch`.
10743       switch (State) {
10744       case OMPDeclareSimdDeclAttr::BS_Undefined:
10745         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10746                                   OutputBecomesInput, Fn);
10747         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10748                                   OutputBecomesInput, Fn);
10749         break;
10750       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10751         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10752                                   OutputBecomesInput, Fn);
10753         break;
10754       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10755         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10756                                   OutputBecomesInput, Fn);
10757         break;
10758       }
10759     }
10760   }
10761 }
10762 
10763 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10764                                               llvm::Function *Fn) {
10765   ASTContext &C = CGM.getContext();
10766   FD = FD->getMostRecentDecl();
10767   // Map params to their positions in function decl.
10768   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10769   if (isa<CXXMethodDecl>(FD))
10770     ParamPositions.try_emplace(FD, 0);
10771   unsigned ParamPos = ParamPositions.size();
10772   for (const ParmVarDecl *P : FD->parameters()) {
10773     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10774     ++ParamPos;
10775   }
10776   while (FD) {
10777     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10778       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10779       // Mark uniform parameters.
10780       for (const Expr *E : Attr->uniforms()) {
10781         E = E->IgnoreParenImpCasts();
10782         unsigned Pos;
10783         if (isa<CXXThisExpr>(E)) {
10784           Pos = ParamPositions[FD];
10785         } else {
10786           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10787                                 ->getCanonicalDecl();
10788           Pos = ParamPositions[PVD];
10789         }
10790         ParamAttrs[Pos].Kind = Uniform;
10791       }
10792       // Get alignment info.
10793       auto NI = Attr->alignments_begin();
10794       for (const Expr *E : Attr->aligneds()) {
10795         E = E->IgnoreParenImpCasts();
10796         unsigned Pos;
10797         QualType ParmTy;
10798         if (isa<CXXThisExpr>(E)) {
10799           Pos = ParamPositions[FD];
10800           ParmTy = E->getType();
10801         } else {
10802           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10803                                 ->getCanonicalDecl();
10804           Pos = ParamPositions[PVD];
10805           ParmTy = PVD->getType();
10806         }
10807         ParamAttrs[Pos].Alignment =
10808             (*NI)
10809                 ? (*NI)->EvaluateKnownConstInt(C)
10810                 : llvm::APSInt::getUnsigned(
10811                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10812                           .getQuantity());
10813         ++NI;
10814       }
10815       // Mark linear parameters.
10816       auto SI = Attr->steps_begin();
10817       auto MI = Attr->modifiers_begin();
10818       for (const Expr *E : Attr->linears()) {
10819         E = E->IgnoreParenImpCasts();
10820         unsigned Pos;
10821         // Rescaling factor needed to compute the linear parameter
10822         // value in the mangled name.
10823         unsigned PtrRescalingFactor = 1;
10824         if (isa<CXXThisExpr>(E)) {
10825           Pos = ParamPositions[FD];
10826         } else {
10827           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10828                                 ->getCanonicalDecl();
10829           Pos = ParamPositions[PVD];
10830           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10831             PtrRescalingFactor = CGM.getContext()
10832                                      .getTypeSizeInChars(P->getPointeeType())
10833                                      .getQuantity();
10834         }
10835         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10836         ParamAttr.Kind = Linear;
10837         // Assuming a stride of 1, for `linear` without modifiers.
10838         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10839         if (*SI) {
10840           Expr::EvalResult Result;
10841           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10842             if (const auto *DRE =
10843                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10844               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10845                 ParamAttr.Kind = LinearWithVarStride;
10846                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10847                     ParamPositions[StridePVD->getCanonicalDecl()]);
10848               }
10849             }
10850           } else {
10851             ParamAttr.StrideOrArg = Result.Val.getInt();
10852           }
10853         }
10854         // If we are using a linear clause on a pointer, we need to
10855         // rescale the value of linear_step with the byte size of the
10856         // pointee type.
10857         if (Linear == ParamAttr.Kind)
10858           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10859         ++SI;
10860         ++MI;
10861       }
10862       llvm::APSInt VLENVal;
10863       SourceLocation ExprLoc;
10864       const Expr *VLENExpr = Attr->getSimdlen();
10865       if (VLENExpr) {
10866         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10867         ExprLoc = VLENExpr->getExprLoc();
10868       }
10869       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10870       if (CGM.getTriple().isX86()) {
10871         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10872       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10873         unsigned VLEN = VLENVal.getExtValue();
10874         StringRef MangledName = Fn->getName();
10875         if (CGM.getTarget().hasFeature("sve"))
10876           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10877                                          MangledName, 's', 128, Fn, ExprLoc);
10878         if (CGM.getTarget().hasFeature("neon"))
10879           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10880                                          MangledName, 'n', 128, Fn, ExprLoc);
10881       }
10882     }
10883     FD = FD->getPreviousDecl();
10884   }
10885 }
10886 
10887 namespace {
10888 /// Cleanup action for doacross support.
10889 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10890 public:
10891   static const int DoacrossFinArgs = 2;
10892 
10893 private:
10894   llvm::FunctionCallee RTLFn;
10895   llvm::Value *Args[DoacrossFinArgs];
10896 
10897 public:
10898   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10899                     ArrayRef<llvm::Value *> CallArgs)
10900       : RTLFn(RTLFn) {
10901     assert(CallArgs.size() == DoacrossFinArgs);
10902     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10903   }
10904   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10905     if (!CGF.HaveInsertPoint())
10906       return;
10907     CGF.EmitRuntimeCall(RTLFn, Args);
10908   }
10909 };
10910 } // namespace
10911 
10912 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10913                                        const OMPLoopDirective &D,
10914                                        ArrayRef<Expr *> NumIterations) {
10915   if (!CGF.HaveInsertPoint())
10916     return;
10917 
10918   ASTContext &C = CGM.getContext();
10919   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10920   RecordDecl *RD;
10921   if (KmpDimTy.isNull()) {
10922     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10923     //  kmp_int64 lo; // lower
10924     //  kmp_int64 up; // upper
10925     //  kmp_int64 st; // stride
10926     // };
10927     RD = C.buildImplicitRecord("kmp_dim");
10928     RD->startDefinition();
10929     addFieldToRecordDecl(C, RD, Int64Ty);
10930     addFieldToRecordDecl(C, RD, Int64Ty);
10931     addFieldToRecordDecl(C, RD, Int64Ty);
10932     RD->completeDefinition();
10933     KmpDimTy = C.getRecordType(RD);
10934   } else {
10935     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10936   }
10937   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10938   QualType ArrayTy =
10939       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10940 
10941   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10942   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10943   enum { LowerFD = 0, UpperFD, StrideFD };
10944   // Fill dims with data.
10945   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10946     LValue DimsLVal = CGF.MakeAddrLValue(
10947         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10948     // dims.upper = num_iterations;
10949     LValue UpperLVal = CGF.EmitLValueForField(
10950         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10951     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
10952         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
10953         Int64Ty, NumIterations[I]->getExprLoc());
10954     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10955     // dims.stride = 1;
10956     LValue StrideLVal = CGF.EmitLValueForField(
10957         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10958     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10959                           StrideLVal);
10960   }
10961 
10962   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10963   // kmp_int32 num_dims, struct kmp_dim * dims);
10964   llvm::Value *Args[] = {
10965       emitUpdateLocation(CGF, D.getBeginLoc()),
10966       getThreadID(CGF, D.getBeginLoc()),
10967       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10968       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10969           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10970           CGM.VoidPtrTy)};
10971 
10972   llvm::FunctionCallee RTLFn =
10973       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10974           CGM.getModule(), OMPRTL___kmpc_doacross_init);
10975   CGF.EmitRuntimeCall(RTLFn, Args);
10976   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10977       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10978   llvm::FunctionCallee FiniRTLFn =
10979       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10980           CGM.getModule(), OMPRTL___kmpc_doacross_fini);
10981   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10982                                              llvm::makeArrayRef(FiniArgs));
10983 }
10984 
10985 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10986                                           const OMPDependClause *C) {
10987   QualType Int64Ty =
10988       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10989   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10990   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10991       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10992   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10993   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10994     const Expr *CounterVal = C->getLoopData(I);
10995     assert(CounterVal);
10996     llvm::Value *CntVal = CGF.EmitScalarConversion(
10997         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10998         CounterVal->getExprLoc());
10999     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11000                           /*Volatile=*/false, Int64Ty);
11001   }
11002   llvm::Value *Args[] = {
11003       emitUpdateLocation(CGF, C->getBeginLoc()),
11004       getThreadID(CGF, C->getBeginLoc()),
11005       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11006   llvm::FunctionCallee RTLFn;
11007   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11008     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11009         CGM.getModule(), OMPRTL___kmpc_doacross_post);
11010   } else {
11011     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11012     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11013         CGM.getModule(), OMPRTL___kmpc_doacross_wait);
11014   }
11015   CGF.EmitRuntimeCall(RTLFn, Args);
11016 }
11017 
11018 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11019                                llvm::FunctionCallee Callee,
11020                                ArrayRef<llvm::Value *> Args) const {
11021   assert(Loc.isValid() && "Outlined function call location must be valid.");
11022   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11023 
11024   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11025     if (Fn->doesNotThrow()) {
11026       CGF.EmitNounwindRuntimeCall(Fn, Args);
11027       return;
11028     }
11029   }
11030   CGF.EmitRuntimeCall(Callee, Args);
11031 }
11032 
11033 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11034     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11035     ArrayRef<llvm::Value *> Args) const {
11036   emitCall(CGF, Loc, OutlinedFn, Args);
11037 }
11038 
11039 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11040   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11041     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11042       HasEmittedDeclareTargetRegion = true;
11043 }
11044 
11045 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11046                                              const VarDecl *NativeParam,
11047                                              const VarDecl *TargetParam) const {
11048   return CGF.GetAddrOfLocalVar(NativeParam);
11049 }
11050 
11051 namespace {
11052 /// Cleanup action for allocate support.
11053 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11054 public:
11055   static const int CleanupArgs = 3;
11056 
11057 private:
11058   llvm::FunctionCallee RTLFn;
11059   llvm::Value *Args[CleanupArgs];
11060 
11061 public:
11062   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11063                        ArrayRef<llvm::Value *> CallArgs)
11064       : RTLFn(RTLFn) {
11065     assert(CallArgs.size() == CleanupArgs &&
11066            "Size of arguments does not match.");
11067     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11068   }
11069   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11070     if (!CGF.HaveInsertPoint())
11071       return;
11072     CGF.EmitRuntimeCall(RTLFn, Args);
11073   }
11074 };
11075 } // namespace
11076 
11077 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11078                                                    const VarDecl *VD) {
11079   if (!VD)
11080     return Address::invalid();
11081   const VarDecl *CVD = VD->getCanonicalDecl();
11082   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11083     return Address::invalid();
11084   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11085   // Use the default allocation.
11086   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11087        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11088       !AA->getAllocator())
11089     return Address::invalid();
11090   llvm::Value *Size;
11091   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11092   if (CVD->getType()->isVariablyModifiedType()) {
11093     Size = CGF.getTypeSize(CVD->getType());
11094     // Align the size: ((size + align - 1) / align) * align
11095     Size = CGF.Builder.CreateNUWAdd(
11096         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11097     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11098     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11099   } else {
11100     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11101     Size = CGM.getSize(Sz.alignTo(Align));
11102   }
11103   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11104   assert(AA->getAllocator() &&
11105          "Expected allocator expression for non-default allocator.");
11106   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11107   // According to the standard, the original allocator type is a enum (integer).
11108   // Convert to pointer type, if required.
11109   if (Allocator->getType()->isIntegerTy())
11110     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11111   else if (Allocator->getType()->isPointerTy())
11112     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11113                                                                 CGM.VoidPtrTy);
11114   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11115 
11116   llvm::Value *Addr =
11117       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11118                               CGM.getModule(), OMPRTL___kmpc_alloc),
11119                           Args, getName({CVD->getName(), ".void.addr"}));
11120   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11121                                                               Allocator};
11122   llvm::FunctionCallee FiniRTLFn =
11123       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(),
11124                                                         OMPRTL___kmpc_free);
11125 
11126   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11127                                                 llvm::makeArrayRef(FiniArgs));
11128   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11129       Addr,
11130       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11131       getName({CVD->getName(), ".addr"}));
11132   return Address(Addr, Align);
11133 }
11134 
11135 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11136     CodeGenModule &CGM, const OMPLoopDirective &S)
11137     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11138   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11139   if (!NeedToPush)
11140     return;
11141   NontemporalDeclsSet &DS =
11142       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11143   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11144     for (const Stmt *Ref : C->private_refs()) {
11145       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11146       const ValueDecl *VD;
11147       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11148         VD = DRE->getDecl();
11149       } else {
11150         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11151         assert((ME->isImplicitCXXThis() ||
11152                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11153                "Expected member of current class.");
11154         VD = ME->getMemberDecl();
11155       }
11156       DS.insert(VD);
11157     }
11158   }
11159 }
11160 
11161 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11162   if (!NeedToPush)
11163     return;
11164   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11165 }
11166 
11167 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11168   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11169 
11170   return llvm::any_of(
11171       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11172       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11173 }
11174 
11175 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11176     const OMPExecutableDirective &S,
11177     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11178     const {
11179   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11180   // Vars in target/task regions must be excluded completely.
11181   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11182       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11183     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11184     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11185     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11186     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11187       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11188         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11189     }
11190   }
11191   // Exclude vars in private clauses.
11192   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11193     for (const Expr *Ref : C->varlists()) {
11194       if (!Ref->getType()->isScalarType())
11195         continue;
11196       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11197       if (!DRE)
11198         continue;
11199       NeedToCheckForLPCs.insert(DRE->getDecl());
11200     }
11201   }
11202   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11203     for (const Expr *Ref : C->varlists()) {
11204       if (!Ref->getType()->isScalarType())
11205         continue;
11206       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11207       if (!DRE)
11208         continue;
11209       NeedToCheckForLPCs.insert(DRE->getDecl());
11210     }
11211   }
11212   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11213     for (const Expr *Ref : C->varlists()) {
11214       if (!Ref->getType()->isScalarType())
11215         continue;
11216       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11217       if (!DRE)
11218         continue;
11219       NeedToCheckForLPCs.insert(DRE->getDecl());
11220     }
11221   }
11222   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11223     for (const Expr *Ref : C->varlists()) {
11224       if (!Ref->getType()->isScalarType())
11225         continue;
11226       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11227       if (!DRE)
11228         continue;
11229       NeedToCheckForLPCs.insert(DRE->getDecl());
11230     }
11231   }
11232   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11233     for (const Expr *Ref : C->varlists()) {
11234       if (!Ref->getType()->isScalarType())
11235         continue;
11236       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11237       if (!DRE)
11238         continue;
11239       NeedToCheckForLPCs.insert(DRE->getDecl());
11240     }
11241   }
11242   for (const Decl *VD : NeedToCheckForLPCs) {
11243     for (const LastprivateConditionalData &Data :
11244          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11245       if (Data.DeclToUniqueName.count(VD) > 0) {
11246         if (!Data.Disabled)
11247           NeedToAddForLPCsAsDisabled.insert(VD);
11248         break;
11249       }
11250     }
11251   }
11252 }
11253 
11254 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11255     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11256     : CGM(CGF.CGM),
11257       Action((CGM.getLangOpts().OpenMP >= 50 &&
11258               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11259                            [](const OMPLastprivateClause *C) {
11260                              return C->getKind() ==
11261                                     OMPC_LASTPRIVATE_conditional;
11262                            }))
11263                  ? ActionToDo::PushAsLastprivateConditional
11264                  : ActionToDo::DoNotPush) {
11265   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11266   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11267     return;
11268   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11269          "Expected a push action.");
11270   LastprivateConditionalData &Data =
11271       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11272   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11273     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11274       continue;
11275 
11276     for (const Expr *Ref : C->varlists()) {
11277       Data.DeclToUniqueName.insert(std::make_pair(
11278           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11279           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11280     }
11281   }
11282   Data.IVLVal = IVLVal;
11283   Data.Fn = CGF.CurFn;
11284 }
11285 
11286 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11287     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11288     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11289   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11290   if (CGM.getLangOpts().OpenMP < 50)
11291     return;
11292   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11293   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11294   if (!NeedToAddForLPCsAsDisabled.empty()) {
11295     Action = ActionToDo::DisableLastprivateConditional;
11296     LastprivateConditionalData &Data =
11297         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11298     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11299       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11300     Data.Fn = CGF.CurFn;
11301     Data.Disabled = true;
11302   }
11303 }
11304 
11305 CGOpenMPRuntime::LastprivateConditionalRAII
11306 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11307     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11308   return LastprivateConditionalRAII(CGF, S);
11309 }
11310 
11311 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11312   if (CGM.getLangOpts().OpenMP < 50)
11313     return;
11314   if (Action == ActionToDo::DisableLastprivateConditional) {
11315     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11316            "Expected list of disabled private vars.");
11317     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11318   }
11319   if (Action == ActionToDo::PushAsLastprivateConditional) {
11320     assert(
11321         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11322         "Expected list of lastprivate conditional vars.");
11323     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11324   }
11325 }
11326 
11327 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11328                                                         const VarDecl *VD) {
11329   ASTContext &C = CGM.getContext();
11330   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11331   if (I == LastprivateConditionalToTypes.end())
11332     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11333   QualType NewType;
11334   const FieldDecl *VDField;
11335   const FieldDecl *FiredField;
11336   LValue BaseLVal;
11337   auto VI = I->getSecond().find(VD);
11338   if (VI == I->getSecond().end()) {
11339     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11340     RD->startDefinition();
11341     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11342     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11343     RD->completeDefinition();
11344     NewType = C.getRecordType(RD);
11345     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11346     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11347     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11348   } else {
11349     NewType = std::get<0>(VI->getSecond());
11350     VDField = std::get<1>(VI->getSecond());
11351     FiredField = std::get<2>(VI->getSecond());
11352     BaseLVal = std::get<3>(VI->getSecond());
11353   }
11354   LValue FiredLVal =
11355       CGF.EmitLValueForField(BaseLVal, FiredField);
11356   CGF.EmitStoreOfScalar(
11357       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11358       FiredLVal);
11359   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11360 }
11361 
11362 namespace {
11363 /// Checks if the lastprivate conditional variable is referenced in LHS.
11364 class LastprivateConditionalRefChecker final
11365     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11366   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11367   const Expr *FoundE = nullptr;
11368   const Decl *FoundD = nullptr;
11369   StringRef UniqueDeclName;
11370   LValue IVLVal;
11371   llvm::Function *FoundFn = nullptr;
11372   SourceLocation Loc;
11373 
11374 public:
11375   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11376     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11377          llvm::reverse(LPM)) {
11378       auto It = D.DeclToUniqueName.find(E->getDecl());
11379       if (It == D.DeclToUniqueName.end())
11380         continue;
11381       if (D.Disabled)
11382         return false;
11383       FoundE = E;
11384       FoundD = E->getDecl()->getCanonicalDecl();
11385       UniqueDeclName = It->second;
11386       IVLVal = D.IVLVal;
11387       FoundFn = D.Fn;
11388       break;
11389     }
11390     return FoundE == E;
11391   }
11392   bool VisitMemberExpr(const MemberExpr *E) {
11393     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11394       return false;
11395     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11396          llvm::reverse(LPM)) {
11397       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11398       if (It == D.DeclToUniqueName.end())
11399         continue;
11400       if (D.Disabled)
11401         return false;
11402       FoundE = E;
11403       FoundD = E->getMemberDecl()->getCanonicalDecl();
11404       UniqueDeclName = It->second;
11405       IVLVal = D.IVLVal;
11406       FoundFn = D.Fn;
11407       break;
11408     }
11409     return FoundE == E;
11410   }
11411   bool VisitStmt(const Stmt *S) {
11412     for (const Stmt *Child : S->children()) {
11413       if (!Child)
11414         continue;
11415       if (const auto *E = dyn_cast<Expr>(Child))
11416         if (!E->isGLValue())
11417           continue;
11418       if (Visit(Child))
11419         return true;
11420     }
11421     return false;
11422   }
11423   explicit LastprivateConditionalRefChecker(
11424       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11425       : LPM(LPM) {}
11426   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11427   getFoundData() const {
11428     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11429   }
11430 };
11431 } // namespace
11432 
11433 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11434                                                        LValue IVLVal,
11435                                                        StringRef UniqueDeclName,
11436                                                        LValue LVal,
11437                                                        SourceLocation Loc) {
11438   // Last updated loop counter for the lastprivate conditional var.
11439   // int<xx> last_iv = 0;
11440   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11441   llvm::Constant *LastIV =
11442       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11443   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11444       IVLVal.getAlignment().getAsAlign());
11445   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11446 
11447   // Last value of the lastprivate conditional.
11448   // decltype(priv_a) last_a;
11449   llvm::Constant *Last = getOrCreateInternalVariable(
11450       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11451   cast<llvm::GlobalVariable>(Last)->setAlignment(
11452       LVal.getAlignment().getAsAlign());
11453   LValue LastLVal =
11454       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11455 
11456   // Global loop counter. Required to handle inner parallel-for regions.
11457   // iv
11458   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11459 
11460   // #pragma omp critical(a)
11461   // if (last_iv <= iv) {
11462   //   last_iv = iv;
11463   //   last_a = priv_a;
11464   // }
11465   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11466                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11467     Action.Enter(CGF);
11468     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11469     // (last_iv <= iv) ? Check if the variable is updated and store new
11470     // value in global var.
11471     llvm::Value *CmpRes;
11472     if (IVLVal.getType()->isSignedIntegerType()) {
11473       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11474     } else {
11475       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11476              "Loop iteration variable must be integer.");
11477       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11478     }
11479     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11480     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11481     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11482     // {
11483     CGF.EmitBlock(ThenBB);
11484 
11485     //   last_iv = iv;
11486     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11487 
11488     //   last_a = priv_a;
11489     switch (CGF.getEvaluationKind(LVal.getType())) {
11490     case TEK_Scalar: {
11491       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11492       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11493       break;
11494     }
11495     case TEK_Complex: {
11496       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11497       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11498       break;
11499     }
11500     case TEK_Aggregate:
11501       llvm_unreachable(
11502           "Aggregates are not supported in lastprivate conditional.");
11503     }
11504     // }
11505     CGF.EmitBranch(ExitBB);
11506     // There is no need to emit line number for unconditional branch.
11507     (void)ApplyDebugLocation::CreateEmpty(CGF);
11508     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11509   };
11510 
11511   if (CGM.getLangOpts().OpenMPSimd) {
11512     // Do not emit as a critical region as no parallel region could be emitted.
11513     RegionCodeGenTy ThenRCG(CodeGen);
11514     ThenRCG(CGF);
11515   } else {
11516     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11517   }
11518 }
11519 
11520 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11521                                                          const Expr *LHS) {
11522   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11523     return;
11524   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11525   if (!Checker.Visit(LHS))
11526     return;
11527   const Expr *FoundE;
11528   const Decl *FoundD;
11529   StringRef UniqueDeclName;
11530   LValue IVLVal;
11531   llvm::Function *FoundFn;
11532   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11533       Checker.getFoundData();
11534   if (FoundFn != CGF.CurFn) {
11535     // Special codegen for inner parallel regions.
11536     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11537     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11538     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11539            "Lastprivate conditional is not found in outer region.");
11540     QualType StructTy = std::get<0>(It->getSecond());
11541     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11542     LValue PrivLVal = CGF.EmitLValue(FoundE);
11543     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11544         PrivLVal.getAddress(CGF),
11545         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11546     LValue BaseLVal =
11547         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11548     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11549     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11550                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11551                         FiredLVal, llvm::AtomicOrdering::Unordered,
11552                         /*IsVolatile=*/true, /*isInit=*/false);
11553     return;
11554   }
11555 
11556   // Private address of the lastprivate conditional in the current context.
11557   // priv_a
11558   LValue LVal = CGF.EmitLValue(FoundE);
11559   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11560                                    FoundE->getExprLoc());
11561 }
11562 
11563 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11564     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11565     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11566   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11567     return;
11568   auto Range = llvm::reverse(LastprivateConditionalStack);
11569   auto It = llvm::find_if(
11570       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11571   if (It == Range.end() || It->Fn != CGF.CurFn)
11572     return;
11573   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11574   assert(LPCI != LastprivateConditionalToTypes.end() &&
11575          "Lastprivates must be registered already.");
11576   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11577   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11578   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11579   for (const auto &Pair : It->DeclToUniqueName) {
11580     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11581     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11582       continue;
11583     auto I = LPCI->getSecond().find(Pair.first);
11584     assert(I != LPCI->getSecond().end() &&
11585            "Lastprivate must be rehistered already.");
11586     // bool Cmp = priv_a.Fired != 0;
11587     LValue BaseLVal = std::get<3>(I->getSecond());
11588     LValue FiredLVal =
11589         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11590     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11591     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11592     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11593     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11594     // if (Cmp) {
11595     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11596     CGF.EmitBlock(ThenBB);
11597     Address Addr = CGF.GetAddrOfLocalVar(VD);
11598     LValue LVal;
11599     if (VD->getType()->isReferenceType())
11600       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11601                                            AlignmentSource::Decl);
11602     else
11603       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11604                                 AlignmentSource::Decl);
11605     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11606                                      D.getBeginLoc());
11607     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11608     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11609     // }
11610   }
11611 }
11612 
11613 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11614     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11615     SourceLocation Loc) {
11616   if (CGF.getLangOpts().OpenMP < 50)
11617     return;
11618   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11619   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11620          "Unknown lastprivate conditional variable.");
11621   StringRef UniqueName = It->second;
11622   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11623   // The variable was not updated in the region - exit.
11624   if (!GV)
11625     return;
11626   LValue LPLVal = CGF.MakeAddrLValue(
11627       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11628   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11629   CGF.EmitStoreOfScalar(Res, PrivLVal);
11630 }
11631 
11632 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11633     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11634     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11635   llvm_unreachable("Not supported in SIMD-only mode");
11636 }
11637 
11638 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11639     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11640     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11641   llvm_unreachable("Not supported in SIMD-only mode");
11642 }
11643 
11644 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11645     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11646     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11647     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11648     bool Tied, unsigned &NumberOfParts) {
11649   llvm_unreachable("Not supported in SIMD-only mode");
11650 }
11651 
11652 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11653                                            SourceLocation Loc,
11654                                            llvm::Function *OutlinedFn,
11655                                            ArrayRef<llvm::Value *> CapturedVars,
11656                                            const Expr *IfCond) {
11657   llvm_unreachable("Not supported in SIMD-only mode");
11658 }
11659 
11660 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11661     CodeGenFunction &CGF, StringRef CriticalName,
11662     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11663     const Expr *Hint) {
11664   llvm_unreachable("Not supported in SIMD-only mode");
11665 }
11666 
11667 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11668                                            const RegionCodeGenTy &MasterOpGen,
11669                                            SourceLocation Loc) {
11670   llvm_unreachable("Not supported in SIMD-only mode");
11671 }
11672 
11673 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11674                                             SourceLocation Loc) {
11675   llvm_unreachable("Not supported in SIMD-only mode");
11676 }
11677 
11678 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11679     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11680     SourceLocation Loc) {
11681   llvm_unreachable("Not supported in SIMD-only mode");
11682 }
11683 
11684 void CGOpenMPSIMDRuntime::emitSingleRegion(
11685     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11686     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11687     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11688     ArrayRef<const Expr *> AssignmentOps) {
11689   llvm_unreachable("Not supported in SIMD-only mode");
11690 }
11691 
11692 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11693                                             const RegionCodeGenTy &OrderedOpGen,
11694                                             SourceLocation Loc,
11695                                             bool IsThreads) {
11696   llvm_unreachable("Not supported in SIMD-only mode");
11697 }
11698 
11699 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11700                                           SourceLocation Loc,
11701                                           OpenMPDirectiveKind Kind,
11702                                           bool EmitChecks,
11703                                           bool ForceSimpleCall) {
11704   llvm_unreachable("Not supported in SIMD-only mode");
11705 }
11706 
11707 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11708     CodeGenFunction &CGF, SourceLocation Loc,
11709     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11710     bool Ordered, const DispatchRTInput &DispatchValues) {
11711   llvm_unreachable("Not supported in SIMD-only mode");
11712 }
11713 
11714 void CGOpenMPSIMDRuntime::emitForStaticInit(
11715     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11716     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11717   llvm_unreachable("Not supported in SIMD-only mode");
11718 }
11719 
11720 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11721     CodeGenFunction &CGF, SourceLocation Loc,
11722     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11723   llvm_unreachable("Not supported in SIMD-only mode");
11724 }
11725 
11726 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11727                                                      SourceLocation Loc,
11728                                                      unsigned IVSize,
11729                                                      bool IVSigned) {
11730   llvm_unreachable("Not supported in SIMD-only mode");
11731 }
11732 
11733 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11734                                               SourceLocation Loc,
11735                                               OpenMPDirectiveKind DKind) {
11736   llvm_unreachable("Not supported in SIMD-only mode");
11737 }
11738 
11739 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11740                                               SourceLocation Loc,
11741                                               unsigned IVSize, bool IVSigned,
11742                                               Address IL, Address LB,
11743                                               Address UB, Address ST) {
11744   llvm_unreachable("Not supported in SIMD-only mode");
11745 }
11746 
11747 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11748                                                llvm::Value *NumThreads,
11749                                                SourceLocation Loc) {
11750   llvm_unreachable("Not supported in SIMD-only mode");
11751 }
11752 
11753 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11754                                              ProcBindKind ProcBind,
11755                                              SourceLocation Loc) {
11756   llvm_unreachable("Not supported in SIMD-only mode");
11757 }
11758 
11759 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11760                                                     const VarDecl *VD,
11761                                                     Address VDAddr,
11762                                                     SourceLocation Loc) {
11763   llvm_unreachable("Not supported in SIMD-only mode");
11764 }
11765 
11766 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11767     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11768     CodeGenFunction *CGF) {
11769   llvm_unreachable("Not supported in SIMD-only mode");
11770 }
11771 
11772 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11773     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11774   llvm_unreachable("Not supported in SIMD-only mode");
11775 }
11776 
11777 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11778                                     ArrayRef<const Expr *> Vars,
11779                                     SourceLocation Loc,
11780                                     llvm::AtomicOrdering AO) {
11781   llvm_unreachable("Not supported in SIMD-only mode");
11782 }
11783 
11784 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11785                                        const OMPExecutableDirective &D,
11786                                        llvm::Function *TaskFunction,
11787                                        QualType SharedsTy, Address Shareds,
11788                                        const Expr *IfCond,
11789                                        const OMPTaskDataTy &Data) {
11790   llvm_unreachable("Not supported in SIMD-only mode");
11791 }
11792 
11793 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11794     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11795     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11796     const Expr *IfCond, const OMPTaskDataTy &Data) {
11797   llvm_unreachable("Not supported in SIMD-only mode");
11798 }
11799 
11800 void CGOpenMPSIMDRuntime::emitReduction(
11801     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11802     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11803     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11804   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11805   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11806                                  ReductionOps, Options);
11807 }
11808 
11809 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11810     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11811     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11812   llvm_unreachable("Not supported in SIMD-only mode");
11813 }
11814 
11815 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11816                                                 SourceLocation Loc,
11817                                                 bool IsWorksharingReduction) {
11818   llvm_unreachable("Not supported in SIMD-only mode");
11819 }
11820 
11821 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11822                                                   SourceLocation Loc,
11823                                                   ReductionCodeGen &RCG,
11824                                                   unsigned N) {
11825   llvm_unreachable("Not supported in SIMD-only mode");
11826 }
11827 
11828 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11829                                                   SourceLocation Loc,
11830                                                   llvm::Value *ReductionsPtr,
11831                                                   LValue SharedLVal) {
11832   llvm_unreachable("Not supported in SIMD-only mode");
11833 }
11834 
11835 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11836                                            SourceLocation Loc) {
11837   llvm_unreachable("Not supported in SIMD-only mode");
11838 }
11839 
11840 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11841     CodeGenFunction &CGF, SourceLocation Loc,
11842     OpenMPDirectiveKind CancelRegion) {
11843   llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845 
11846 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11847                                          SourceLocation Loc, const Expr *IfCond,
11848                                          OpenMPDirectiveKind CancelRegion) {
11849   llvm_unreachable("Not supported in SIMD-only mode");
11850 }
11851 
11852 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11853     const OMPExecutableDirective &D, StringRef ParentName,
11854     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11855     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11856   llvm_unreachable("Not supported in SIMD-only mode");
11857 }
11858 
11859 void CGOpenMPSIMDRuntime::emitTargetCall(
11860     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11861     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11862     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11863     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11864                                      const OMPLoopDirective &D)>
11865         SizeEmitter) {
11866   llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868 
11869 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11870   llvm_unreachable("Not supported in SIMD-only mode");
11871 }
11872 
11873 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11874   llvm_unreachable("Not supported in SIMD-only mode");
11875 }
11876 
11877 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11878   return false;
11879 }
11880 
11881 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11882                                         const OMPExecutableDirective &D,
11883                                         SourceLocation Loc,
11884                                         llvm::Function *OutlinedFn,
11885                                         ArrayRef<llvm::Value *> CapturedVars) {
11886   llvm_unreachable("Not supported in SIMD-only mode");
11887 }
11888 
11889 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11890                                              const Expr *NumTeams,
11891                                              const Expr *ThreadLimit,
11892                                              SourceLocation Loc) {
11893   llvm_unreachable("Not supported in SIMD-only mode");
11894 }
11895 
11896 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11897     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11898     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11899   llvm_unreachable("Not supported in SIMD-only mode");
11900 }
11901 
11902 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11903     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11904     const Expr *Device) {
11905   llvm_unreachable("Not supported in SIMD-only mode");
11906 }
11907 
11908 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11909                                            const OMPLoopDirective &D,
11910                                            ArrayRef<Expr *> NumIterations) {
11911   llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913 
11914 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11915                                               const OMPDependClause *C) {
11916   llvm_unreachable("Not supported in SIMD-only mode");
11917 }
11918 
11919 const VarDecl *
11920 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11921                                         const VarDecl *NativeParam) const {
11922   llvm_unreachable("Not supported in SIMD-only mode");
11923 }
11924 
11925 Address
11926 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11927                                          const VarDecl *NativeParam,
11928                                          const VarDecl *TargetParam) const {
11929   llvm_unreachable("Not supported in SIMD-only mode");
11930 }
11931