1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 enum OpenMPRTLFunction {
573   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
574   /// kmpc_micro microtask, ...);
575   OMPRTL__kmpc_fork_call,
576   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
577   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
578   OMPRTL__kmpc_threadprivate_cached,
579   /// Call to void __kmpc_threadprivate_register( ident_t *,
580   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
581   OMPRTL__kmpc_threadprivate_register,
582   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
583   OMPRTL__kmpc_global_thread_num,
584   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
585   // kmp_critical_name *crit);
586   OMPRTL__kmpc_critical,
587   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
588   // global_tid, kmp_critical_name *crit, uintptr_t hint);
589   OMPRTL__kmpc_critical_with_hint,
590   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
591   // kmp_critical_name *crit);
592   OMPRTL__kmpc_end_critical,
593   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_cancel_barrier,
596   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_barrier,
598   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
599   OMPRTL__kmpc_for_static_fini,
600   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
601   // global_tid);
602   OMPRTL__kmpc_serialized_parallel,
603   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
604   // global_tid);
605   OMPRTL__kmpc_end_serialized_parallel,
606   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
607   // kmp_int32 num_threads);
608   OMPRTL__kmpc_push_num_threads,
609   // Call to void __kmpc_flush(ident_t *loc);
610   OMPRTL__kmpc_flush,
611   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_master,
613   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_master,
615   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
616   // int end_part);
617   OMPRTL__kmpc_omp_taskyield,
618   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
619   OMPRTL__kmpc_single,
620   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
621   OMPRTL__kmpc_end_single,
622   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
623   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
624   // kmp_routine_entry_t *task_entry);
625   OMPRTL__kmpc_omp_task_alloc,
626   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
627   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
628   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
629   // kmp_int64 device_id);
630   OMPRTL__kmpc_omp_target_task_alloc,
631   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
632   // new_task);
633   OMPRTL__kmpc_omp_task,
634   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
635   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
636   // kmp_int32 didit);
637   OMPRTL__kmpc_copyprivate,
638   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
639   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
640   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
641   OMPRTL__kmpc_reduce,
642   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
643   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
644   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
645   // *lck);
646   OMPRTL__kmpc_reduce_nowait,
647   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
648   // kmp_critical_name *lck);
649   OMPRTL__kmpc_end_reduce,
650   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
651   // kmp_critical_name *lck);
652   OMPRTL__kmpc_end_reduce_nowait,
653   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
654   // kmp_task_t * new_task);
655   OMPRTL__kmpc_omp_task_begin_if0,
656   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
657   // kmp_task_t * new_task);
658   OMPRTL__kmpc_omp_task_complete_if0,
659   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_ordered,
661   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_ordered,
663   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
664   // global_tid);
665   OMPRTL__kmpc_omp_taskwait,
666   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
667   OMPRTL__kmpc_taskgroup,
668   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
669   OMPRTL__kmpc_end_taskgroup,
670   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
671   // int proc_bind);
672   OMPRTL__kmpc_push_proc_bind,
673   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
674   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
675   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
676   OMPRTL__kmpc_omp_task_with_deps,
677   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
678   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
679   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
680   OMPRTL__kmpc_omp_wait_deps,
681   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
682   // global_tid, kmp_int32 cncl_kind);
683   OMPRTL__kmpc_cancellationpoint,
684   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
685   // kmp_int32 cncl_kind);
686   OMPRTL__kmpc_cancel,
687   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
688   // kmp_int32 num_teams, kmp_int32 thread_limit);
689   OMPRTL__kmpc_push_num_teams,
690   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
691   // microtask, ...);
692   OMPRTL__kmpc_fork_teams,
693   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
694   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
695   // sched, kmp_uint64 grainsize, void *task_dup);
696   OMPRTL__kmpc_taskloop,
697   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
698   // num_dims, struct kmp_dim *dims);
699   OMPRTL__kmpc_doacross_init,
700   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
701   OMPRTL__kmpc_doacross_fini,
702   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
703   // *vec);
704   OMPRTL__kmpc_doacross_post,
705   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
706   // *vec);
707   OMPRTL__kmpc_doacross_wait,
708   // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data);
709   OMPRTL__kmpc_taskred_init,
710   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
711   // *d);
712   OMPRTL__kmpc_task_reduction_get_th_data,
713   // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
714   // is_ws, int num, void *data);
715   OMPRTL__kmpc_taskred_modifier_init,
716   // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
717   // int is_ws);
718   OMPRTL__kmpc_task_reduction_modifier_fini,
719   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
720   OMPRTL__kmpc_alloc,
721   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
722   OMPRTL__kmpc_free,
723   // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid,
724   // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
725   OMPRTL__kmpc_init_allocator,
726   // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
727   OMPRTL__kmpc_destroy_allocator,
728 
729   //
730   // Offloading related calls
731   //
732   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
733   // size);
734   OMPRTL__kmpc_push_target_tripcount,
735   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
736   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
737   // *arg_types);
738   OMPRTL__tgt_target,
739   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
740   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741   // *arg_types);
742   OMPRTL__tgt_target_nowait,
743   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
744   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
745   // *arg_types, int32_t num_teams, int32_t thread_limit);
746   OMPRTL__tgt_target_teams,
747   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
748   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
749   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
750   OMPRTL__tgt_target_teams_nowait,
751   // Call to void __tgt_register_requires(int64_t flags);
752   OMPRTL__tgt_register_requires,
753   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
754   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
755   OMPRTL__tgt_target_data_begin,
756   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
757   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
758   // *arg_types);
759   OMPRTL__tgt_target_data_begin_nowait,
760   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
761   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
762   OMPRTL__tgt_target_data_end,
763   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
764   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
765   // *arg_types);
766   OMPRTL__tgt_target_data_end_nowait,
767   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
768   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
769   OMPRTL__tgt_target_data_update,
770   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
771   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
772   // *arg_types);
773   OMPRTL__tgt_target_data_update_nowait,
774   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
775   OMPRTL__tgt_mapper_num_components,
776   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
777   // *base, void *begin, int64_t size, int64_t type);
778   OMPRTL__tgt_push_mapper_component,
779   // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
780   // int gtid, kmp_task_t *task);
781   OMPRTL__kmpc_task_allow_completion_event,
782 };
783 
784 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
785 /// region.
786 class CleanupTy final : public EHScopeStack::Cleanup {
787   PrePostActionTy *Action;
788 
789 public:
790   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
791   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
792     if (!CGF.HaveInsertPoint())
793       return;
794     Action->Exit(CGF);
795   }
796 };
797 
798 } // anonymous namespace
799 
800 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
801   CodeGenFunction::RunCleanupsScope Scope(CGF);
802   if (PrePostAction) {
803     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
804     Callback(CodeGen, CGF, *PrePostAction);
805   } else {
806     PrePostActionTy Action;
807     Callback(CodeGen, CGF, Action);
808   }
809 }
810 
811 /// Check if the combiner is a call to UDR combiner and if it is so return the
812 /// UDR decl used for reduction.
813 static const OMPDeclareReductionDecl *
814 getReductionInit(const Expr *ReductionOp) {
815   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
816     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
817       if (const auto *DRE =
818               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
819         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
820           return DRD;
821   return nullptr;
822 }
823 
824 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
825                                              const OMPDeclareReductionDecl *DRD,
826                                              const Expr *InitOp,
827                                              Address Private, Address Original,
828                                              QualType Ty) {
829   if (DRD->getInitializer()) {
830     std::pair<llvm::Function *, llvm::Function *> Reduction =
831         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
832     const auto *CE = cast<CallExpr>(InitOp);
833     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
834     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
835     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
836     const auto *LHSDRE =
837         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
838     const auto *RHSDRE =
839         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
840     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
841     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
842                             [=]() { return Private; });
843     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
844                             [=]() { return Original; });
845     (void)PrivateScope.Privatize();
846     RValue Func = RValue::get(Reduction.second);
847     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
848     CGF.EmitIgnoredExpr(InitOp);
849   } else {
850     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
851     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
852     auto *GV = new llvm::GlobalVariable(
853         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
854         llvm::GlobalValue::PrivateLinkage, Init, Name);
855     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
856     RValue InitRVal;
857     switch (CGF.getEvaluationKind(Ty)) {
858     case TEK_Scalar:
859       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
860       break;
861     case TEK_Complex:
862       InitRVal =
863           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
864       break;
865     case TEK_Aggregate:
866       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
867       break;
868     }
869     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
870     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
871     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
872                          /*IsInitializer=*/false);
873   }
874 }
875 
876 /// Emit initialization of arrays of complex types.
877 /// \param DestAddr Address of the array.
878 /// \param Type Type of array.
879 /// \param Init Initial expression of array.
880 /// \param SrcAddr Address of the original array.
881 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
882                                  QualType Type, bool EmitDeclareReductionInit,
883                                  const Expr *Init,
884                                  const OMPDeclareReductionDecl *DRD,
885                                  Address SrcAddr = Address::invalid()) {
886   // Perform element-by-element initialization.
887   QualType ElementTy;
888 
889   // Drill down to the base element type on both arrays.
890   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
891   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
892   DestAddr =
893       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
894   if (DRD)
895     SrcAddr =
896         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
897 
898   llvm::Value *SrcBegin = nullptr;
899   if (DRD)
900     SrcBegin = SrcAddr.getPointer();
901   llvm::Value *DestBegin = DestAddr.getPointer();
902   // Cast from pointer to array type to pointer to single element.
903   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
904   // The basic structure here is a while-do loop.
905   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
906   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
907   llvm::Value *IsEmpty =
908       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
909   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
910 
911   // Enter the loop body, making that address the current address.
912   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
913   CGF.EmitBlock(BodyBB);
914 
915   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
916 
917   llvm::PHINode *SrcElementPHI = nullptr;
918   Address SrcElementCurrent = Address::invalid();
919   if (DRD) {
920     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
921                                           "omp.arraycpy.srcElementPast");
922     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
923     SrcElementCurrent =
924         Address(SrcElementPHI,
925                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
926   }
927   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
928       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
929   DestElementPHI->addIncoming(DestBegin, EntryBB);
930   Address DestElementCurrent =
931       Address(DestElementPHI,
932               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
933 
934   // Emit copy.
935   {
936     CodeGenFunction::RunCleanupsScope InitScope(CGF);
937     if (EmitDeclareReductionInit) {
938       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
939                                        SrcElementCurrent, ElementTy);
940     } else
941       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
942                            /*IsInitializer=*/false);
943   }
944 
945   if (DRD) {
946     // Shift the address forward by one element.
947     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
948         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
949     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
950   }
951 
952   // Shift the address forward by one element.
953   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
954       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
955   // Check whether we've reached the end.
956   llvm::Value *Done =
957       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
958   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
959   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
960 
961   // Done.
962   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
963 }
964 
965 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
966   return CGF.EmitOMPSharedLValue(E);
967 }
968 
969 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
970                                             const Expr *E) {
971   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
972     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
973   return LValue();
974 }
975 
976 void ReductionCodeGen::emitAggregateInitialization(
977     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
978     const OMPDeclareReductionDecl *DRD) {
979   // Emit VarDecl with copy init for arrays.
980   // Get the address of the original variable captured in current
981   // captured region.
982   const auto *PrivateVD =
983       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
984   bool EmitDeclareReductionInit =
985       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
986   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
987                        EmitDeclareReductionInit,
988                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
989                                                 : PrivateVD->getInit(),
990                        DRD, SharedLVal.getAddress(CGF));
991 }
992 
993 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
994                                    ArrayRef<const Expr *> Origs,
995                                    ArrayRef<const Expr *> Privates,
996                                    ArrayRef<const Expr *> ReductionOps) {
997   ClausesData.reserve(Shareds.size());
998   SharedAddresses.reserve(Shareds.size());
999   Sizes.reserve(Shareds.size());
1000   BaseDecls.reserve(Shareds.size());
1001   const auto *IOrig = Origs.begin();
1002   const auto *IPriv = Privates.begin();
1003   const auto *IRed = ReductionOps.begin();
1004   for (const Expr *Ref : Shareds) {
1005     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
1006     std::advance(IOrig, 1);
1007     std::advance(IPriv, 1);
1008     std::advance(IRed, 1);
1009   }
1010 }
1011 
1012 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
1013   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
1014          "Number of generated lvalues must be exactly N.");
1015   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
1016   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
1017   SharedAddresses.emplace_back(First, Second);
1018   if (ClausesData[N].Shared == ClausesData[N].Ref) {
1019     OrigAddresses.emplace_back(First, Second);
1020   } else {
1021     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1022     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1023     OrigAddresses.emplace_back(First, Second);
1024   }
1025 }
1026 
1027 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1028   const auto *PrivateVD =
1029       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1030   QualType PrivateType = PrivateVD->getType();
1031   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1032   if (!PrivateType->isVariablyModifiedType()) {
1033     Sizes.emplace_back(
1034         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
1035         nullptr);
1036     return;
1037   }
1038   llvm::Value *Size;
1039   llvm::Value *SizeInChars;
1040   auto *ElemType =
1041       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
1042           ->getElementType();
1043   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1044   if (AsArraySection) {
1045     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
1046                                      OrigAddresses[N].first.getPointer(CGF));
1047     Size = CGF.Builder.CreateNUWAdd(
1048         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1049     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1050   } else {
1051     SizeInChars =
1052         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
1053     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1054   }
1055   Sizes.emplace_back(SizeInChars, Size);
1056   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1057       CGF,
1058       cast<OpaqueValueExpr>(
1059           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1060       RValue::get(Size));
1061   CGF.EmitVariablyModifiedType(PrivateType);
1062 }
1063 
1064 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1065                                          llvm::Value *Size) {
1066   const auto *PrivateVD =
1067       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1068   QualType PrivateType = PrivateVD->getType();
1069   if (!PrivateType->isVariablyModifiedType()) {
1070     assert(!Size && !Sizes[N].second &&
1071            "Size should be nullptr for non-variably modified reduction "
1072            "items.");
1073     return;
1074   }
1075   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1076       CGF,
1077       cast<OpaqueValueExpr>(
1078           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1079       RValue::get(Size));
1080   CGF.EmitVariablyModifiedType(PrivateType);
1081 }
1082 
1083 void ReductionCodeGen::emitInitialization(
1084     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1085     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1086   assert(SharedAddresses.size() > N && "No variable was generated");
1087   const auto *PrivateVD =
1088       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1089   const OMPDeclareReductionDecl *DRD =
1090       getReductionInit(ClausesData[N].ReductionOp);
1091   QualType PrivateType = PrivateVD->getType();
1092   PrivateAddr = CGF.Builder.CreateElementBitCast(
1093       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1094   QualType SharedType = SharedAddresses[N].first.getType();
1095   SharedLVal = CGF.MakeAddrLValue(
1096       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1097                                        CGF.ConvertTypeForMem(SharedType)),
1098       SharedType, SharedAddresses[N].first.getBaseInfo(),
1099       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1100   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1101     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1102   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1103     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1104                                      PrivateAddr, SharedLVal.getAddress(CGF),
1105                                      SharedLVal.getType());
1106   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1107              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1108     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1109                          PrivateVD->getType().getQualifiers(),
1110                          /*IsInitializer=*/false);
1111   }
1112 }
1113 
1114 bool ReductionCodeGen::needCleanups(unsigned N) {
1115   const auto *PrivateVD =
1116       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1117   QualType PrivateType = PrivateVD->getType();
1118   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1119   return DTorKind != QualType::DK_none;
1120 }
1121 
1122 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1123                                     Address PrivateAddr) {
1124   const auto *PrivateVD =
1125       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1126   QualType PrivateType = PrivateVD->getType();
1127   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1128   if (needCleanups(N)) {
1129     PrivateAddr = CGF.Builder.CreateElementBitCast(
1130         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1131     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1132   }
1133 }
1134 
1135 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1136                           LValue BaseLV) {
1137   BaseTy = BaseTy.getNonReferenceType();
1138   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1139          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1140     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1141       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1142     } else {
1143       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1144       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1145     }
1146     BaseTy = BaseTy->getPointeeType();
1147   }
1148   return CGF.MakeAddrLValue(
1149       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1150                                        CGF.ConvertTypeForMem(ElTy)),
1151       BaseLV.getType(), BaseLV.getBaseInfo(),
1152       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1153 }
1154 
1155 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1156                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1157                           llvm::Value *Addr) {
1158   Address Tmp = Address::invalid();
1159   Address TopTmp = Address::invalid();
1160   Address MostTopTmp = Address::invalid();
1161   BaseTy = BaseTy.getNonReferenceType();
1162   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1163          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1164     Tmp = CGF.CreateMemTemp(BaseTy);
1165     if (TopTmp.isValid())
1166       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1167     else
1168       MostTopTmp = Tmp;
1169     TopTmp = Tmp;
1170     BaseTy = BaseTy->getPointeeType();
1171   }
1172   llvm::Type *Ty = BaseLVType;
1173   if (Tmp.isValid())
1174     Ty = Tmp.getElementType();
1175   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1176   if (Tmp.isValid()) {
1177     CGF.Builder.CreateStore(Addr, Tmp);
1178     return MostTopTmp;
1179   }
1180   return Address(Addr, BaseLVAlignment);
1181 }
1182 
1183 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1184   const VarDecl *OrigVD = nullptr;
1185   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1186     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1187     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1188       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1189     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1190       Base = TempASE->getBase()->IgnoreParenImpCasts();
1191     DE = cast<DeclRefExpr>(Base);
1192     OrigVD = cast<VarDecl>(DE->getDecl());
1193   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1194     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1195     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1196       Base = TempASE->getBase()->IgnoreParenImpCasts();
1197     DE = cast<DeclRefExpr>(Base);
1198     OrigVD = cast<VarDecl>(DE->getDecl());
1199   }
1200   return OrigVD;
1201 }
1202 
1203 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1204                                                Address PrivateAddr) {
1205   const DeclRefExpr *DE;
1206   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1207     BaseDecls.emplace_back(OrigVD);
1208     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1209     LValue BaseLValue =
1210         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1211                     OriginalBaseLValue);
1212     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1213         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1214     llvm::Value *PrivatePointer =
1215         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1216             PrivateAddr.getPointer(),
1217             SharedAddresses[N].first.getAddress(CGF).getType());
1218     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1219     return castToBase(CGF, OrigVD->getType(),
1220                       SharedAddresses[N].first.getType(),
1221                       OriginalBaseLValue.getAddress(CGF).getType(),
1222                       OriginalBaseLValue.getAlignment(), Ptr);
1223   }
1224   BaseDecls.emplace_back(
1225       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1226   return PrivateAddr;
1227 }
1228 
1229 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1230   const OMPDeclareReductionDecl *DRD =
1231       getReductionInit(ClausesData[N].ReductionOp);
1232   return DRD && DRD->getInitializer();
1233 }
1234 
1235 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1236   return CGF.EmitLoadOfPointerLValue(
1237       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1238       getThreadIDVariable()->getType()->castAs<PointerType>());
1239 }
1240 
1241 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1242   if (!CGF.HaveInsertPoint())
1243     return;
1244   // 1.2.2 OpenMP Language Terminology
1245   // Structured block - An executable statement with a single entry at the
1246   // top and a single exit at the bottom.
1247   // The point of exit cannot be a branch out of the structured block.
1248   // longjmp() and throw() must not violate the entry/exit criteria.
1249   CGF.EHStack.pushTerminate();
1250   CodeGen(CGF);
1251   CGF.EHStack.popTerminate();
1252 }
1253 
1254 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1255     CodeGenFunction &CGF) {
1256   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1257                             getThreadIDVariable()->getType(),
1258                             AlignmentSource::Decl);
1259 }
1260 
1261 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1262                                        QualType FieldTy) {
1263   auto *Field = FieldDecl::Create(
1264       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1265       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1266       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1267   Field->setAccess(AS_public);
1268   DC->addDecl(Field);
1269   return Field;
1270 }
1271 
1272 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1273                                  StringRef Separator)
1274     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1275       OffloadEntriesInfoManager(CGM) {
1276   ASTContext &C = CGM.getContext();
1277   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1278   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1279   RD->startDefinition();
1280   // reserved_1
1281   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1282   // flags
1283   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1284   // reserved_2
1285   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1286   // reserved_3
1287   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1288   // psource
1289   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1290   RD->completeDefinition();
1291   IdentQTy = C.getRecordType(RD);
1292   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1293   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1294 
1295   loadOffloadInfoMetadata();
1296 }
1297 
1298 void CGOpenMPRuntime::clear() {
1299   InternalVars.clear();
1300   // Clean non-target variable declarations possibly used only in debug info.
1301   for (const auto &Data : EmittedNonTargetVariables) {
1302     if (!Data.getValue().pointsToAliveValue())
1303       continue;
1304     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1305     if (!GV)
1306       continue;
1307     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1308       continue;
1309     GV->eraseFromParent();
1310   }
1311 }
1312 
1313 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1314   SmallString<128> Buffer;
1315   llvm::raw_svector_ostream OS(Buffer);
1316   StringRef Sep = FirstSeparator;
1317   for (StringRef Part : Parts) {
1318     OS << Sep << Part;
1319     Sep = Separator;
1320   }
1321   return std::string(OS.str());
1322 }
1323 
1324 static llvm::Function *
1325 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1326                           const Expr *CombinerInitializer, const VarDecl *In,
1327                           const VarDecl *Out, bool IsCombiner) {
1328   // void .omp_combiner.(Ty *in, Ty *out);
1329   ASTContext &C = CGM.getContext();
1330   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1331   FunctionArgList Args;
1332   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1333                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1334   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1335                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1336   Args.push_back(&OmpOutParm);
1337   Args.push_back(&OmpInParm);
1338   const CGFunctionInfo &FnInfo =
1339       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1340   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1341   std::string Name = CGM.getOpenMPRuntime().getName(
1342       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1343   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1344                                     Name, &CGM.getModule());
1345   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1346   if (CGM.getLangOpts().Optimize) {
1347     Fn->removeFnAttr(llvm::Attribute::NoInline);
1348     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1349     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1350   }
1351   CodeGenFunction CGF(CGM);
1352   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1353   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1354   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1355                     Out->getLocation());
1356   CodeGenFunction::OMPPrivateScope Scope(CGF);
1357   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1358   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1359     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1360         .getAddress(CGF);
1361   });
1362   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1363   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1364     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1365         .getAddress(CGF);
1366   });
1367   (void)Scope.Privatize();
1368   if (!IsCombiner && Out->hasInit() &&
1369       !CGF.isTrivialInitializer(Out->getInit())) {
1370     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1371                          Out->getType().getQualifiers(),
1372                          /*IsInitializer=*/true);
1373   }
1374   if (CombinerInitializer)
1375     CGF.EmitIgnoredExpr(CombinerInitializer);
1376   Scope.ForceCleanup();
1377   CGF.FinishFunction();
1378   return Fn;
1379 }
1380 
1381 void CGOpenMPRuntime::emitUserDefinedReduction(
1382     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1383   if (UDRMap.count(D) > 0)
1384     return;
1385   llvm::Function *Combiner = emitCombinerOrInitializer(
1386       CGM, D->getType(), D->getCombiner(),
1387       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1388       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1389       /*IsCombiner=*/true);
1390   llvm::Function *Initializer = nullptr;
1391   if (const Expr *Init = D->getInitializer()) {
1392     Initializer = emitCombinerOrInitializer(
1393         CGM, D->getType(),
1394         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1395                                                                      : nullptr,
1396         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1397         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1398         /*IsCombiner=*/false);
1399   }
1400   UDRMap.try_emplace(D, Combiner, Initializer);
1401   if (CGF) {
1402     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1403     Decls.second.push_back(D);
1404   }
1405 }
1406 
1407 std::pair<llvm::Function *, llvm::Function *>
1408 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1409   auto I = UDRMap.find(D);
1410   if (I != UDRMap.end())
1411     return I->second;
1412   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1413   return UDRMap.lookup(D);
1414 }
1415 
1416 namespace {
1417 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1418 // Builder if one is present.
1419 struct PushAndPopStackRAII {
1420   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1421                       bool HasCancel)
1422       : OMPBuilder(OMPBuilder) {
1423     if (!OMPBuilder)
1424       return;
1425 
1426     // The following callback is the crucial part of clangs cleanup process.
1427     //
1428     // NOTE:
1429     // Once the OpenMPIRBuilder is used to create parallel regions (and
1430     // similar), the cancellation destination (Dest below) is determined via
1431     // IP. That means if we have variables to finalize we split the block at IP,
1432     // use the new block (=BB) as destination to build a JumpDest (via
1433     // getJumpDestInCurrentScope(BB)) which then is fed to
1434     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1435     // to push & pop an FinalizationInfo object.
1436     // The FiniCB will still be needed but at the point where the
1437     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1438     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1439       assert(IP.getBlock()->end() == IP.getPoint() &&
1440              "Clang CG should cause non-terminated block!");
1441       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1442       CGF.Builder.restoreIP(IP);
1443       CodeGenFunction::JumpDest Dest =
1444           CGF.getOMPCancelDestination(OMPD_parallel);
1445       CGF.EmitBranchThroughCleanup(Dest);
1446     };
1447 
1448     // TODO: Remove this once we emit parallel regions through the
1449     //       OpenMPIRBuilder as it can do this setup internally.
1450     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1451         {FiniCB, OMPD_parallel, HasCancel});
1452     OMPBuilder->pushFinalizationCB(std::move(FI));
1453   }
1454   ~PushAndPopStackRAII() {
1455     if (OMPBuilder)
1456       OMPBuilder->popFinalizationCB();
1457   }
1458   llvm::OpenMPIRBuilder *OMPBuilder;
1459 };
1460 } // namespace
1461 
1462 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1463     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1464     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1465     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1466   assert(ThreadIDVar->getType()->isPointerType() &&
1467          "thread id variable must be of type kmp_int32 *");
1468   CodeGenFunction CGF(CGM, true);
1469   bool HasCancel = false;
1470   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1471     HasCancel = OPD->hasCancel();
1472   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1473     HasCancel = OPD->hasCancel();
1474   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1475     HasCancel = OPSD->hasCancel();
1476   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1477     HasCancel = OPFD->hasCancel();
1478   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1479     HasCancel = OPFD->hasCancel();
1480   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1481     HasCancel = OPFD->hasCancel();
1482   else if (const auto *OPFD =
1483                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1484     HasCancel = OPFD->hasCancel();
1485   else if (const auto *OPFD =
1486                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1487     HasCancel = OPFD->hasCancel();
1488 
1489   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1490   //       parallel region to make cancellation barriers work properly.
1491   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1492   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1493   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1494                                     HasCancel, OutlinedHelperName);
1495   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1496   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1497 }
1498 
1499 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1500     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1501     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1502   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1503   return emitParallelOrTeamsOutlinedFunction(
1504       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1505 }
1506 
1507 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1508     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1509     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1510   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1511   return emitParallelOrTeamsOutlinedFunction(
1512       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1513 }
1514 
1515 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1516     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1517     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1518     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1519     bool Tied, unsigned &NumberOfParts) {
1520   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1521                                               PrePostActionTy &) {
1522     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1523     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1524     llvm::Value *TaskArgs[] = {
1525         UpLoc, ThreadID,
1526         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1527                                     TaskTVar->getType()->castAs<PointerType>())
1528             .getPointer(CGF)};
1529     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1530   };
1531   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1532                                                             UntiedCodeGen);
1533   CodeGen.setAction(Action);
1534   assert(!ThreadIDVar->getType()->isPointerType() &&
1535          "thread id variable must be of type kmp_int32 for tasks");
1536   const OpenMPDirectiveKind Region =
1537       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1538                                                       : OMPD_task;
1539   const CapturedStmt *CS = D.getCapturedStmt(Region);
1540   bool HasCancel = false;
1541   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1542     HasCancel = TD->hasCancel();
1543   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1544     HasCancel = TD->hasCancel();
1545   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1546     HasCancel = TD->hasCancel();
1547   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1548     HasCancel = TD->hasCancel();
1549 
1550   CodeGenFunction CGF(CGM, true);
1551   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1552                                         InnermostKind, HasCancel, Action);
1553   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1554   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1555   if (!Tied)
1556     NumberOfParts = Action.getNumberOfParts();
1557   return Res;
1558 }
1559 
1560 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1561                              const RecordDecl *RD, const CGRecordLayout &RL,
1562                              ArrayRef<llvm::Constant *> Data) {
1563   llvm::StructType *StructTy = RL.getLLVMType();
1564   unsigned PrevIdx = 0;
1565   ConstantInitBuilder CIBuilder(CGM);
1566   auto DI = Data.begin();
1567   for (const FieldDecl *FD : RD->fields()) {
1568     unsigned Idx = RL.getLLVMFieldNo(FD);
1569     // Fill the alignment.
1570     for (unsigned I = PrevIdx; I < Idx; ++I)
1571       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1572     PrevIdx = Idx + 1;
1573     Fields.add(*DI);
1574     ++DI;
1575   }
1576 }
1577 
1578 template <class... As>
1579 static llvm::GlobalVariable *
1580 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1581                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1582                    As &&... Args) {
1583   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1584   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1585   ConstantInitBuilder CIBuilder(CGM);
1586   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1587   buildStructValue(Fields, CGM, RD, RL, Data);
1588   return Fields.finishAndCreateGlobal(
1589       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1590       std::forward<As>(Args)...);
1591 }
1592 
1593 template <typename T>
1594 static void
1595 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1596                                          ArrayRef<llvm::Constant *> Data,
1597                                          T &Parent) {
1598   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1601   buildStructValue(Fields, CGM, RD, RL, Data);
1602   Fields.finishAndAddTo(Parent);
1603 }
1604 
1605 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1606   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1607   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1608   FlagsTy FlagsKey(Flags, Reserved2Flags);
1609   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1610   if (!Entry) {
1611     if (!DefaultOpenMPPSource) {
1612       // Initialize default location for psource field of ident_t structure of
1613       // all ident_t objects. Format is ";file;function;line;column;;".
1614       // Taken from
1615       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1616       DefaultOpenMPPSource =
1617           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1618       DefaultOpenMPPSource =
1619           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1620     }
1621 
1622     llvm::Constant *Data[] = {
1623         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1624         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1625         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1626         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1627     llvm::GlobalValue *DefaultOpenMPLocation =
1628         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1629                            llvm::GlobalValue::PrivateLinkage);
1630     DefaultOpenMPLocation->setUnnamedAddr(
1631         llvm::GlobalValue::UnnamedAddr::Global);
1632 
1633     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1634   }
1635   return Address(Entry, Align);
1636 }
1637 
1638 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1639                                              bool AtCurrentPoint) {
1640   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1641   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1642 
1643   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1644   if (AtCurrentPoint) {
1645     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1646         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1647   } else {
1648     Elem.second.ServiceInsertPt =
1649         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1650     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1651   }
1652 }
1653 
1654 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1655   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656   if (Elem.second.ServiceInsertPt) {
1657     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1658     Elem.second.ServiceInsertPt = nullptr;
1659     Ptr->eraseFromParent();
1660   }
1661 }
1662 
1663 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1664                                                  SourceLocation Loc,
1665                                                  unsigned Flags) {
1666   Flags |= OMP_IDENT_KMPC;
1667   // If no debug info is generated - return global default location.
1668   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1669       Loc.isInvalid())
1670     return getOrCreateDefaultLocation(Flags).getPointer();
1671 
1672   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1673 
1674   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1675   Address LocValue = Address::invalid();
1676   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1677   if (I != OpenMPLocThreadIDMap.end())
1678     LocValue = Address(I->second.DebugLoc, Align);
1679 
1680   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1681   // GetOpenMPThreadID was called before this routine.
1682   if (!LocValue.isValid()) {
1683     // Generate "ident_t .kmpc_loc.addr;"
1684     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1685     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1686     Elem.second.DebugLoc = AI.getPointer();
1687     LocValue = AI;
1688 
1689     if (!Elem.second.ServiceInsertPt)
1690       setLocThreadIdInsertPt(CGF);
1691     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1692     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1693     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1694                              CGF.getTypeSize(IdentQTy));
1695   }
1696 
1697   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1698   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1699   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1700   LValue PSource =
1701       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1702 
1703   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1704   if (OMPDebugLoc == nullptr) {
1705     SmallString<128> Buffer2;
1706     llvm::raw_svector_ostream OS2(Buffer2);
1707     // Build debug location
1708     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1709     OS2 << ";" << PLoc.getFilename() << ";";
1710     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1711       OS2 << FD->getQualifiedNameAsString();
1712     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1713     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1714     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1715   }
1716   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1717   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1718 
1719   // Our callers always pass this to a runtime function, so for
1720   // convenience, go ahead and return a naked pointer.
1721   return LocValue.getPointer();
1722 }
1723 
1724 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1725                                           SourceLocation Loc) {
1726   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1727 
1728   llvm::Value *ThreadID = nullptr;
1729   // Check whether we've already cached a load of the thread id in this
1730   // function.
1731   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1732   if (I != OpenMPLocThreadIDMap.end()) {
1733     ThreadID = I->second.ThreadID;
1734     if (ThreadID != nullptr)
1735       return ThreadID;
1736   }
1737   // If exceptions are enabled, do not use parameter to avoid possible crash.
1738   if (auto *OMPRegionInfo =
1739           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1740     if (OMPRegionInfo->getThreadIDVariable()) {
1741       // Check if this an outlined function with thread id passed as argument.
1742       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1743       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1744       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1745           !CGF.getLangOpts().CXXExceptions ||
1746           CGF.Builder.GetInsertBlock() == TopBlock ||
1747           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1748           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1749               TopBlock ||
1750           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1751               CGF.Builder.GetInsertBlock()) {
1752         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1753         // If value loaded in entry block, cache it and use it everywhere in
1754         // function.
1755         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1756           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1757           Elem.second.ThreadID = ThreadID;
1758         }
1759         return ThreadID;
1760       }
1761     }
1762   }
1763 
1764   // This is not an outlined function region - need to call __kmpc_int32
1765   // kmpc_global_thread_num(ident_t *loc).
1766   // Generate thread id value and cache this value for use across the
1767   // function.
1768   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1769   if (!Elem.second.ServiceInsertPt)
1770     setLocThreadIdInsertPt(CGF);
1771   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1772   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1773   llvm::CallInst *Call = CGF.Builder.CreateCall(
1774       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1775       emitUpdateLocation(CGF, Loc));
1776   Call->setCallingConv(CGF.getRuntimeCC());
1777   Elem.second.ThreadID = Call;
1778   return Call;
1779 }
1780 
1781 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1782   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1783   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1784     clearLocThreadIdInsertPt(CGF);
1785     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1786   }
1787   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1788     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1789       UDRMap.erase(D);
1790     FunctionUDRMap.erase(CGF.CurFn);
1791   }
1792   auto I = FunctionUDMMap.find(CGF.CurFn);
1793   if (I != FunctionUDMMap.end()) {
1794     for(const auto *D : I->second)
1795       UDMMap.erase(D);
1796     FunctionUDMMap.erase(I);
1797   }
1798   LastprivateConditionalToTypes.erase(CGF.CurFn);
1799 }
1800 
1801 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1802   return IdentTy->getPointerTo();
1803 }
1804 
1805 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1806   if (!Kmpc_MicroTy) {
1807     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1808     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1809                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1810     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1811   }
1812   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1813 }
1814 
1815 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1816   llvm::FunctionCallee RTLFn = nullptr;
1817   switch (static_cast<OpenMPRTLFunction>(Function)) {
1818   case OMPRTL__kmpc_fork_call: {
1819     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1820     // microtask, ...);
1821     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1822                                 getKmpc_MicroPointerTy()};
1823     auto *FnTy =
1824         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1825     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1826     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1827       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1828         llvm::LLVMContext &Ctx = F->getContext();
1829         llvm::MDBuilder MDB(Ctx);
1830         // Annotate the callback behavior of the __kmpc_fork_call:
1831         //  - The callback callee is argument number 2 (microtask).
1832         //  - The first two arguments of the callback callee are unknown (-1).
1833         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1834         //    callback callee.
1835         F->addMetadata(
1836             llvm::LLVMContext::MD_callback,
1837             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1838                                         2, {-1, -1},
1839                                         /* VarArgsArePassed */ true)}));
1840       }
1841     }
1842     break;
1843   }
1844   case OMPRTL__kmpc_global_thread_num: {
1845     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1846     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1850     break;
1851   }
1852   case OMPRTL__kmpc_threadprivate_cached: {
1853     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1854     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1855     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1856                                 CGM.VoidPtrTy, CGM.SizeTy,
1857                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1861     break;
1862   }
1863   case OMPRTL__kmpc_critical: {
1864     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1865     // kmp_critical_name *crit);
1866     llvm::Type *TypeParams[] = {
1867         getIdentTyPointerTy(), CGM.Int32Ty,
1868         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1872     break;
1873   }
1874   case OMPRTL__kmpc_critical_with_hint: {
1875     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1876     // kmp_critical_name *crit, uintptr_t hint);
1877     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1878                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1879                                 CGM.IntPtrTy};
1880     auto *FnTy =
1881         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1882     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1883     break;
1884   }
1885   case OMPRTL__kmpc_threadprivate_register: {
1886     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1887     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1888     // typedef void *(*kmpc_ctor)(void *);
1889     auto *KmpcCtorTy =
1890         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1891                                 /*isVarArg*/ false)->getPointerTo();
1892     // typedef void *(*kmpc_cctor)(void *, void *);
1893     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1894     auto *KmpcCopyCtorTy =
1895         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1896                                 /*isVarArg*/ false)
1897             ->getPointerTo();
1898     // typedef void (*kmpc_dtor)(void *);
1899     auto *KmpcDtorTy =
1900         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1901             ->getPointerTo();
1902     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1903                               KmpcCopyCtorTy, KmpcDtorTy};
1904     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1905                                         /*isVarArg*/ false);
1906     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1907     break;
1908   }
1909   case OMPRTL__kmpc_end_critical: {
1910     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1911     // kmp_critical_name *crit);
1912     llvm::Type *TypeParams[] = {
1913         getIdentTyPointerTy(), CGM.Int32Ty,
1914         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1915     auto *FnTy =
1916         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1917     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1918     break;
1919   }
1920   case OMPRTL__kmpc_cancel_barrier: {
1921     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1922     // global_tid);
1923     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1924     auto *FnTy =
1925         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1926     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1927     break;
1928   }
1929   case OMPRTL__kmpc_barrier: {
1930     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1931     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1935     break;
1936   }
1937   case OMPRTL__kmpc_for_static_fini: {
1938     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1939     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1940     auto *FnTy =
1941         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1942     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1943     break;
1944   }
1945   case OMPRTL__kmpc_push_num_threads: {
1946     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1947     // kmp_int32 num_threads)
1948     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1949                                 CGM.Int32Ty};
1950     auto *FnTy =
1951         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1952     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1953     break;
1954   }
1955   case OMPRTL__kmpc_serialized_parallel: {
1956     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1957     // global_tid);
1958     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1959     auto *FnTy =
1960         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1961     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1962     break;
1963   }
1964   case OMPRTL__kmpc_end_serialized_parallel: {
1965     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1966     // global_tid);
1967     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1968     auto *FnTy =
1969         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1970     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1971     break;
1972   }
1973   case OMPRTL__kmpc_flush: {
1974     // Build void __kmpc_flush(ident_t *loc);
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1976     auto *FnTy =
1977         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1978     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1979     break;
1980   }
1981   case OMPRTL__kmpc_master: {
1982     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1983     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1984     auto *FnTy =
1985         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1986     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1987     break;
1988   }
1989   case OMPRTL__kmpc_end_master: {
1990     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1991     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1992     auto *FnTy =
1993         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1994     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1995     break;
1996   }
1997   case OMPRTL__kmpc_omp_taskyield: {
1998     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1999     // int end_part);
2000     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2001     auto *FnTy =
2002         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2003     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2004     break;
2005   }
2006   case OMPRTL__kmpc_single: {
2007     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2008     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2009     auto *FnTy =
2010         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2011     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2012     break;
2013   }
2014   case OMPRTL__kmpc_end_single: {
2015     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2016     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2017     auto *FnTy =
2018         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2019     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2020     break;
2021   }
2022   case OMPRTL__kmpc_omp_task_alloc: {
2023     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2024     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2025     // kmp_routine_entry_t *task_entry);
2026     assert(KmpRoutineEntryPtrTy != nullptr &&
2027            "Type kmp_routine_entry_t must be created.");
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2029                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2030     // Return void * and then cast to particular kmp_task_t type.
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2034     break;
2035   }
2036   case OMPRTL__kmpc_omp_target_task_alloc: {
2037     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2038     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2040     assert(KmpRoutineEntryPtrTy != nullptr &&
2041            "Type kmp_routine_entry_t must be created.");
2042     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2044                                 CGM.Int64Ty};
2045     // Return void * and then cast to particular kmp_task_t type.
2046     auto *FnTy =
2047         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2048     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2049     break;
2050   }
2051   case OMPRTL__kmpc_omp_task: {
2052     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2053     // *new_task);
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2055                                 CGM.VoidPtrTy};
2056     auto *FnTy =
2057         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2058     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2059     break;
2060   }
2061   case OMPRTL__kmpc_copyprivate: {
2062     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2063     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2064     // kmp_int32 didit);
2065     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2066     auto *CpyFnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2068     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2069                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2070                                 CGM.Int32Ty};
2071     auto *FnTy =
2072         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2074     break;
2075   }
2076   case OMPRTL__kmpc_reduce: {
2077     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2078     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2079     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2080     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2081     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2082                                                /*isVarArg=*/false);
2083     llvm::Type *TypeParams[] = {
2084         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2085         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2086         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2087     auto *FnTy =
2088         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2089     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2090     break;
2091   }
2092   case OMPRTL__kmpc_reduce_nowait: {
2093     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2094     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2095     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2096     // *lck);
2097     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2098     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2099                                                /*isVarArg=*/false);
2100     llvm::Type *TypeParams[] = {
2101         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2102         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2103         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2104     auto *FnTy =
2105         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2106     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2107     break;
2108   }
2109   case OMPRTL__kmpc_end_reduce: {
2110     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2111     // kmp_critical_name *lck);
2112     llvm::Type *TypeParams[] = {
2113         getIdentTyPointerTy(), CGM.Int32Ty,
2114         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2115     auto *FnTy =
2116         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2117     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2118     break;
2119   }
2120   case OMPRTL__kmpc_end_reduce_nowait: {
2121     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2122     // kmp_critical_name *lck);
2123     llvm::Type *TypeParams[] = {
2124         getIdentTyPointerTy(), CGM.Int32Ty,
2125         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2126     auto *FnTy =
2127         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2128     RTLFn =
2129         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2130     break;
2131   }
2132   case OMPRTL__kmpc_omp_task_begin_if0: {
2133     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2134     // *new_task);
2135     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2136                                 CGM.VoidPtrTy};
2137     auto *FnTy =
2138         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2139     RTLFn =
2140         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2141     break;
2142   }
2143   case OMPRTL__kmpc_omp_task_complete_if0: {
2144     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2145     // *new_task);
2146     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2147                                 CGM.VoidPtrTy};
2148     auto *FnTy =
2149         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2150     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2151                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2152     break;
2153   }
2154   case OMPRTL__kmpc_ordered: {
2155     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2156     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2157     auto *FnTy =
2158         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2159     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2160     break;
2161   }
2162   case OMPRTL__kmpc_end_ordered: {
2163     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2164     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2165     auto *FnTy =
2166         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2167     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2168     break;
2169   }
2170   case OMPRTL__kmpc_omp_taskwait: {
2171     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2173     auto *FnTy =
2174         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2175     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2176     break;
2177   }
2178   case OMPRTL__kmpc_taskgroup: {
2179     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2181     auto *FnTy =
2182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2183     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2184     break;
2185   }
2186   case OMPRTL__kmpc_end_taskgroup: {
2187     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2188     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2189     auto *FnTy =
2190         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2191     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2192     break;
2193   }
2194   case OMPRTL__kmpc_push_proc_bind: {
2195     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2196     // int proc_bind)
2197     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2201     break;
2202   }
2203   case OMPRTL__kmpc_omp_task_with_deps: {
2204     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2205     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2206     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2207     llvm::Type *TypeParams[] = {
2208         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2209         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2212     RTLFn =
2213         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2214     break;
2215   }
2216   case OMPRTL__kmpc_omp_wait_deps: {
2217     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2218     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2219     // kmp_depend_info_t *noalias_dep_list);
2220     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2221                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2222                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2223     auto *FnTy =
2224         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2225     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2226     break;
2227   }
2228   case OMPRTL__kmpc_cancellationpoint: {
2229     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2230     // global_tid, kmp_int32 cncl_kind)
2231     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2232     auto *FnTy =
2233         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2234     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2235     break;
2236   }
2237   case OMPRTL__kmpc_cancel: {
2238     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2239     // kmp_int32 cncl_kind)
2240     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2241     auto *FnTy =
2242         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2243     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2244     break;
2245   }
2246   case OMPRTL__kmpc_push_num_teams: {
2247     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2248     // kmp_int32 num_teams, kmp_int32 num_threads)
2249     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2250         CGM.Int32Ty};
2251     auto *FnTy =
2252         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2253     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2254     break;
2255   }
2256   case OMPRTL__kmpc_fork_teams: {
2257     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2258     // microtask, ...);
2259     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2260                                 getKmpc_MicroPointerTy()};
2261     auto *FnTy =
2262         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2263     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2264     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2265       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2266         llvm::LLVMContext &Ctx = F->getContext();
2267         llvm::MDBuilder MDB(Ctx);
2268         // Annotate the callback behavior of the __kmpc_fork_teams:
2269         //  - The callback callee is argument number 2 (microtask).
2270         //  - The first two arguments of the callback callee are unknown (-1).
2271         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2272         //    callback callee.
2273         F->addMetadata(
2274             llvm::LLVMContext::MD_callback,
2275             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2276                                         2, {-1, -1},
2277                                         /* VarArgsArePassed */ true)}));
2278       }
2279     }
2280     break;
2281   }
2282   case OMPRTL__kmpc_taskloop: {
2283     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2284     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2285     // sched, kmp_uint64 grainsize, void *task_dup);
2286     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2287                                 CGM.IntTy,
2288                                 CGM.VoidPtrTy,
2289                                 CGM.IntTy,
2290                                 CGM.Int64Ty->getPointerTo(),
2291                                 CGM.Int64Ty->getPointerTo(),
2292                                 CGM.Int64Ty,
2293                                 CGM.IntTy,
2294                                 CGM.IntTy,
2295                                 CGM.Int64Ty,
2296                                 CGM.VoidPtrTy};
2297     auto *FnTy =
2298         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2299     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2300     break;
2301   }
2302   case OMPRTL__kmpc_doacross_init: {
2303     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2304     // num_dims, struct kmp_dim *dims);
2305     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2306                                 CGM.Int32Ty,
2307                                 CGM.Int32Ty,
2308                                 CGM.VoidPtrTy};
2309     auto *FnTy =
2310         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2311     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2312     break;
2313   }
2314   case OMPRTL__kmpc_doacross_fini: {
2315     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2316     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2320     break;
2321   }
2322   case OMPRTL__kmpc_doacross_post: {
2323     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2324     // *vec);
2325     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2326                                 CGM.Int64Ty->getPointerTo()};
2327     auto *FnTy =
2328         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2329     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2330     break;
2331   }
2332   case OMPRTL__kmpc_doacross_wait: {
2333     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2334     // *vec);
2335     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2336                                 CGM.Int64Ty->getPointerTo()};
2337     auto *FnTy =
2338         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2339     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2340     break;
2341   }
2342   case OMPRTL__kmpc_taskred_init: {
2343     // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data);
2344     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2345     auto *FnTy =
2346         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2347     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init");
2348     break;
2349   }
2350   case OMPRTL__kmpc_task_reduction_get_th_data: {
2351     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2352     // *d);
2353     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2354     auto *FnTy =
2355         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2356     RTLFn = CGM.CreateRuntimeFunction(
2357         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2358     break;
2359   }
2360   case OMPRTL__kmpc_taskred_modifier_init: {
2361     // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
2362     // is_ws, int num_data, void *data);
2363     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
2364                                 CGM.IntTy, CGM.VoidPtrTy};
2365     auto *FnTy =
2366         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2367     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2368                                       /*Name=*/"__kmpc_taskred_modifier_init");
2369     break;
2370   }
2371   case OMPRTL__kmpc_task_reduction_modifier_fini: {
2372     // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
2373     // int is_ws);
2374     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
2375     auto *FnTy =
2376         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2377     RTLFn = CGM.CreateRuntimeFunction(
2378         FnTy,
2379         /*Name=*/"__kmpc_task_reduction_modifier_fini");
2380     break;
2381   }
2382   case OMPRTL__kmpc_alloc: {
2383     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2384     // al); omp_allocator_handle_t type is void *.
2385     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2386     auto *FnTy =
2387         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2388     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2389     break;
2390   }
2391   case OMPRTL__kmpc_free: {
2392     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2393     // al); omp_allocator_handle_t type is void *.
2394     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2398     break;
2399   }
2400   case OMPRTL__kmpc_init_allocator: {
2401     // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid,
2402     // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
2403     // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is
2404     // void*.
2405     auto *FnTy = llvm::FunctionType::get(
2406         CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy},
2407         /*isVarArg=*/false);
2408     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator");
2409     break;
2410   }
2411   case OMPRTL__kmpc_destroy_allocator: {
2412     // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
2413     // omp_allocator_handle_t type is void*.
2414     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy},
2415                                          /*isVarArg=*/false);
2416     RTLFn =
2417         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator");
2418     break;
2419   }
2420   case OMPRTL__kmpc_push_target_tripcount: {
2421     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2422     // size);
2423     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2424     llvm::FunctionType *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2427     break;
2428   }
2429   case OMPRTL__tgt_target: {
2430     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.VoidPtrTy,
2435                                 CGM.Int32Ty,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.VoidPtrPtrTy,
2438                                 CGM.Int64Ty->getPointerTo(),
2439                                 CGM.Int64Ty->getPointerTo()};
2440     auto *FnTy =
2441         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2443     break;
2444   }
2445   case OMPRTL__tgt_target_nowait: {
2446     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2447     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2448     // int64_t *arg_types);
2449     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2450                                 CGM.VoidPtrTy,
2451                                 CGM.Int32Ty,
2452                                 CGM.VoidPtrPtrTy,
2453                                 CGM.VoidPtrPtrTy,
2454                                 CGM.Int64Ty->getPointerTo(),
2455                                 CGM.Int64Ty->getPointerTo()};
2456     auto *FnTy =
2457         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2458     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2459     break;
2460   }
2461   case OMPRTL__tgt_target_teams: {
2462     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2463     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2464     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2465     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2466                                 CGM.VoidPtrTy,
2467                                 CGM.Int32Ty,
2468                                 CGM.VoidPtrPtrTy,
2469                                 CGM.VoidPtrPtrTy,
2470                                 CGM.Int64Ty->getPointerTo(),
2471                                 CGM.Int64Ty->getPointerTo(),
2472                                 CGM.Int32Ty,
2473                                 CGM.Int32Ty};
2474     auto *FnTy =
2475         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2477     break;
2478   }
2479   case OMPRTL__tgt_target_teams_nowait: {
2480     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2481     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2482     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2483     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2484                                 CGM.VoidPtrTy,
2485                                 CGM.Int32Ty,
2486                                 CGM.VoidPtrPtrTy,
2487                                 CGM.VoidPtrPtrTy,
2488                                 CGM.Int64Ty->getPointerTo(),
2489                                 CGM.Int64Ty->getPointerTo(),
2490                                 CGM.Int32Ty,
2491                                 CGM.Int32Ty};
2492     auto *FnTy =
2493         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2494     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2495     break;
2496   }
2497   case OMPRTL__tgt_register_requires: {
2498     // Build void __tgt_register_requires(int64_t flags);
2499     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2500     auto *FnTy =
2501         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2502     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2503     break;
2504   }
2505   case OMPRTL__tgt_target_data_begin: {
2506     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2507     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2508     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2509                                 CGM.Int32Ty,
2510                                 CGM.VoidPtrPtrTy,
2511                                 CGM.VoidPtrPtrTy,
2512                                 CGM.Int64Ty->getPointerTo(),
2513                                 CGM.Int64Ty->getPointerTo()};
2514     auto *FnTy =
2515         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2516     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2517     break;
2518   }
2519   case OMPRTL__tgt_target_data_begin_nowait: {
2520     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2521     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2522     // *arg_types);
2523     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2524                                 CGM.Int32Ty,
2525                                 CGM.VoidPtrPtrTy,
2526                                 CGM.VoidPtrPtrTy,
2527                                 CGM.Int64Ty->getPointerTo(),
2528                                 CGM.Int64Ty->getPointerTo()};
2529     auto *FnTy =
2530         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2531     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2532     break;
2533   }
2534   case OMPRTL__tgt_target_data_end: {
2535     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2536     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2537     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2538                                 CGM.Int32Ty,
2539                                 CGM.VoidPtrPtrTy,
2540                                 CGM.VoidPtrPtrTy,
2541                                 CGM.Int64Ty->getPointerTo(),
2542                                 CGM.Int64Ty->getPointerTo()};
2543     auto *FnTy =
2544         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2545     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2546     break;
2547   }
2548   case OMPRTL__tgt_target_data_end_nowait: {
2549     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2550     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2551     // *arg_types);
2552     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2553                                 CGM.Int32Ty,
2554                                 CGM.VoidPtrPtrTy,
2555                                 CGM.VoidPtrPtrTy,
2556                                 CGM.Int64Ty->getPointerTo(),
2557                                 CGM.Int64Ty->getPointerTo()};
2558     auto *FnTy =
2559         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2561     break;
2562   }
2563   case OMPRTL__tgt_target_data_update: {
2564     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2565     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2566     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2567                                 CGM.Int32Ty,
2568                                 CGM.VoidPtrPtrTy,
2569                                 CGM.VoidPtrPtrTy,
2570                                 CGM.Int64Ty->getPointerTo(),
2571                                 CGM.Int64Ty->getPointerTo()};
2572     auto *FnTy =
2573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2575     break;
2576   }
2577   case OMPRTL__tgt_target_data_update_nowait: {
2578     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2579     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2580     // *arg_types);
2581     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2582                                 CGM.Int32Ty,
2583                                 CGM.VoidPtrPtrTy,
2584                                 CGM.VoidPtrPtrTy,
2585                                 CGM.Int64Ty->getPointerTo(),
2586                                 CGM.Int64Ty->getPointerTo()};
2587     auto *FnTy =
2588         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2589     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2590     break;
2591   }
2592   case OMPRTL__tgt_mapper_num_components: {
2593     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2594     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2595     auto *FnTy =
2596         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2597     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2598     break;
2599   }
2600   case OMPRTL__tgt_push_mapper_component: {
2601     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2602     // *base, void *begin, int64_t size, int64_t type);
2603     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2604                                 CGM.Int64Ty, CGM.Int64Ty};
2605     auto *FnTy =
2606         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2607     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2608     break;
2609   }
2610   case OMPRTL__kmpc_task_allow_completion_event: {
2611     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
2612     // int gtid, kmp_task_t *task);
2613     auto *FnTy = llvm::FunctionType::get(
2614         CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy},
2615         /*isVarArg=*/false);
2616     RTLFn =
2617         CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event");
2618     break;
2619   }
2620   }
2621   assert(RTLFn && "Unable to find OpenMP runtime function");
2622   return RTLFn;
2623 }
2624 
2625 llvm::FunctionCallee
2626 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2627   assert((IVSize == 32 || IVSize == 64) &&
2628          "IV size is not compatible with the omp runtime");
2629   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2630                                             : "__kmpc_for_static_init_4u")
2631                                 : (IVSigned ? "__kmpc_for_static_init_8"
2632                                             : "__kmpc_for_static_init_8u");
2633   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2634   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2635   llvm::Type *TypeParams[] = {
2636     getIdentTyPointerTy(),                     // loc
2637     CGM.Int32Ty,                               // tid
2638     CGM.Int32Ty,                               // schedtype
2639     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2640     PtrTy,                                     // p_lower
2641     PtrTy,                                     // p_upper
2642     PtrTy,                                     // p_stride
2643     ITy,                                       // incr
2644     ITy                                        // chunk
2645   };
2646   auto *FnTy =
2647       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2648   return CGM.CreateRuntimeFunction(FnTy, Name);
2649 }
2650 
2651 llvm::FunctionCallee
2652 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2653   assert((IVSize == 32 || IVSize == 64) &&
2654          "IV size is not compatible with the omp runtime");
2655   StringRef Name =
2656       IVSize == 32
2657           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2658           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2659   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2660   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2661                                CGM.Int32Ty,           // tid
2662                                CGM.Int32Ty,           // schedtype
2663                                ITy,                   // lower
2664                                ITy,                   // upper
2665                                ITy,                   // stride
2666                                ITy                    // chunk
2667   };
2668   auto *FnTy =
2669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2670   return CGM.CreateRuntimeFunction(FnTy, Name);
2671 }
2672 
2673 llvm::FunctionCallee
2674 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2675   assert((IVSize == 32 || IVSize == 64) &&
2676          "IV size is not compatible with the omp runtime");
2677   StringRef Name =
2678       IVSize == 32
2679           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2680           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2681   llvm::Type *TypeParams[] = {
2682       getIdentTyPointerTy(), // loc
2683       CGM.Int32Ty,           // tid
2684   };
2685   auto *FnTy =
2686       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2687   return CGM.CreateRuntimeFunction(FnTy, Name);
2688 }
2689 
2690 llvm::FunctionCallee
2691 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2692   assert((IVSize == 32 || IVSize == 64) &&
2693          "IV size is not compatible with the omp runtime");
2694   StringRef Name =
2695       IVSize == 32
2696           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2697           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2698   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2699   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2700   llvm::Type *TypeParams[] = {
2701     getIdentTyPointerTy(),                     // loc
2702     CGM.Int32Ty,                               // tid
2703     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2704     PtrTy,                                     // p_lower
2705     PtrTy,                                     // p_upper
2706     PtrTy                                      // p_stride
2707   };
2708   auto *FnTy =
2709       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2710   return CGM.CreateRuntimeFunction(FnTy, Name);
2711 }
2712 
2713 /// Obtain information that uniquely identifies a target entry. This
2714 /// consists of the file and device IDs as well as line number associated with
2715 /// the relevant entry source location.
2716 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2717                                      unsigned &DeviceID, unsigned &FileID,
2718                                      unsigned &LineNum) {
2719   SourceManager &SM = C.getSourceManager();
2720 
2721   // The loc should be always valid and have a file ID (the user cannot use
2722   // #pragma directives in macros)
2723 
2724   assert(Loc.isValid() && "Source location is expected to be always valid.");
2725 
2726   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2727   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2728 
2729   llvm::sys::fs::UniqueID ID;
2730   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2731     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2732         << PLoc.getFilename() << EC.message();
2733 
2734   DeviceID = ID.getDevice();
2735   FileID = ID.getFile();
2736   LineNum = PLoc.getLine();
2737 }
2738 
2739 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2740   if (CGM.getLangOpts().OpenMPSimd)
2741     return Address::invalid();
2742   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2743       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2744   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2745               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2746                HasRequiresUnifiedSharedMemory))) {
2747     SmallString<64> PtrName;
2748     {
2749       llvm::raw_svector_ostream OS(PtrName);
2750       OS << CGM.getMangledName(GlobalDecl(VD));
2751       if (!VD->isExternallyVisible()) {
2752         unsigned DeviceID, FileID, Line;
2753         getTargetEntryUniqueInfo(CGM.getContext(),
2754                                  VD->getCanonicalDecl()->getBeginLoc(),
2755                                  DeviceID, FileID, Line);
2756         OS << llvm::format("_%x", FileID);
2757       }
2758       OS << "_decl_tgt_ref_ptr";
2759     }
2760     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2761     if (!Ptr) {
2762       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2763       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2764                                         PtrName);
2765 
2766       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2767       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2768 
2769       if (!CGM.getLangOpts().OpenMPIsDevice)
2770         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2771       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2772     }
2773     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2774   }
2775   return Address::invalid();
2776 }
2777 
2778 llvm::Constant *
2779 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2780   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2781          !CGM.getContext().getTargetInfo().isTLSSupported());
2782   // Lookup the entry, lazily creating it if necessary.
2783   std::string Suffix = getName({"cache", ""});
2784   return getOrCreateInternalVariable(
2785       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2786 }
2787 
2788 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2789                                                 const VarDecl *VD,
2790                                                 Address VDAddr,
2791                                                 SourceLocation Loc) {
2792   if (CGM.getLangOpts().OpenMPUseTLS &&
2793       CGM.getContext().getTargetInfo().isTLSSupported())
2794     return VDAddr;
2795 
2796   llvm::Type *VarTy = VDAddr.getElementType();
2797   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2798                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2799                                                        CGM.Int8PtrTy),
2800                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2801                          getOrCreateThreadPrivateCache(VD)};
2802   return Address(CGF.EmitRuntimeCall(
2803       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2804                  VDAddr.getAlignment());
2805 }
2806 
2807 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2808     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2809     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2810   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2811   // library.
2812   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2813   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2814                       OMPLoc);
2815   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2816   // to register constructor/destructor for variable.
2817   llvm::Value *Args[] = {
2818       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2819       Ctor, CopyCtor, Dtor};
2820   CGF.EmitRuntimeCall(
2821       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2822 }
2823 
2824 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2825     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2826     bool PerformInit, CodeGenFunction *CGF) {
2827   if (CGM.getLangOpts().OpenMPUseTLS &&
2828       CGM.getContext().getTargetInfo().isTLSSupported())
2829     return nullptr;
2830 
2831   VD = VD->getDefinition(CGM.getContext());
2832   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2833     QualType ASTTy = VD->getType();
2834 
2835     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2836     const Expr *Init = VD->getAnyInitializer();
2837     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2838       // Generate function that re-emits the declaration's initializer into the
2839       // threadprivate copy of the variable VD
2840       CodeGenFunction CtorCGF(CGM);
2841       FunctionArgList Args;
2842       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2843                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2844                             ImplicitParamDecl::Other);
2845       Args.push_back(&Dst);
2846 
2847       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2848           CGM.getContext().VoidPtrTy, Args);
2849       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2850       std::string Name = getName({"__kmpc_global_ctor_", ""});
2851       llvm::Function *Fn =
2852           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2853       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2854                             Args, Loc, Loc);
2855       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2856           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2857           CGM.getContext().VoidPtrTy, Dst.getLocation());
2858       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2859       Arg = CtorCGF.Builder.CreateElementBitCast(
2860           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2861       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2862                                /*IsInitializer=*/true);
2863       ArgVal = CtorCGF.EmitLoadOfScalar(
2864           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2865           CGM.getContext().VoidPtrTy, Dst.getLocation());
2866       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2867       CtorCGF.FinishFunction();
2868       Ctor = Fn;
2869     }
2870     if (VD->getType().isDestructedType() != QualType::DK_none) {
2871       // Generate function that emits destructor call for the threadprivate copy
2872       // of the variable VD
2873       CodeGenFunction DtorCGF(CGM);
2874       FunctionArgList Args;
2875       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2876                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2877                             ImplicitParamDecl::Other);
2878       Args.push_back(&Dst);
2879 
2880       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2881           CGM.getContext().VoidTy, Args);
2882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2883       std::string Name = getName({"__kmpc_global_dtor_", ""});
2884       llvm::Function *Fn =
2885           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2886       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2887       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2888                             Loc, Loc);
2889       // Create a scope with an artificial location for the body of this function.
2890       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2891       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2892           DtorCGF.GetAddrOfLocalVar(&Dst),
2893           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2894       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2895                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2896                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2897       DtorCGF.FinishFunction();
2898       Dtor = Fn;
2899     }
2900     // Do not emit init function if it is not required.
2901     if (!Ctor && !Dtor)
2902       return nullptr;
2903 
2904     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2905     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2906                                                /*isVarArg=*/false)
2907                            ->getPointerTo();
2908     // Copying constructor for the threadprivate variable.
2909     // Must be NULL - reserved by runtime, but currently it requires that this
2910     // parameter is always NULL. Otherwise it fires assertion.
2911     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2912     if (Ctor == nullptr) {
2913       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2914                                              /*isVarArg=*/false)
2915                          ->getPointerTo();
2916       Ctor = llvm::Constant::getNullValue(CtorTy);
2917     }
2918     if (Dtor == nullptr) {
2919       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2920                                              /*isVarArg=*/false)
2921                          ->getPointerTo();
2922       Dtor = llvm::Constant::getNullValue(DtorTy);
2923     }
2924     if (!CGF) {
2925       auto *InitFunctionTy =
2926           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2927       std::string Name = getName({"__omp_threadprivate_init_", ""});
2928       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2929           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2930       CodeGenFunction InitCGF(CGM);
2931       FunctionArgList ArgList;
2932       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2933                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2934                             Loc, Loc);
2935       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2936       InitCGF.FinishFunction();
2937       return InitFunction;
2938     }
2939     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2940   }
2941   return nullptr;
2942 }
2943 
2944 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2945                                                      llvm::GlobalVariable *Addr,
2946                                                      bool PerformInit) {
2947   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2948       !CGM.getLangOpts().OpenMPIsDevice)
2949     return false;
2950   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2951       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2952   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2953       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2954        HasRequiresUnifiedSharedMemory))
2955     return CGM.getLangOpts().OpenMPIsDevice;
2956   VD = VD->getDefinition(CGM.getContext());
2957   assert(VD && "Unknown VarDecl");
2958 
2959   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2960     return CGM.getLangOpts().OpenMPIsDevice;
2961 
2962   QualType ASTTy = VD->getType();
2963   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2964 
2965   // Produce the unique prefix to identify the new target regions. We use
2966   // the source location of the variable declaration which we know to not
2967   // conflict with any target region.
2968   unsigned DeviceID;
2969   unsigned FileID;
2970   unsigned Line;
2971   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2972   SmallString<128> Buffer, Out;
2973   {
2974     llvm::raw_svector_ostream OS(Buffer);
2975     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2976        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2977   }
2978 
2979   const Expr *Init = VD->getAnyInitializer();
2980   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2981     llvm::Constant *Ctor;
2982     llvm::Constant *ID;
2983     if (CGM.getLangOpts().OpenMPIsDevice) {
2984       // Generate function that re-emits the declaration's initializer into
2985       // the threadprivate copy of the variable VD
2986       CodeGenFunction CtorCGF(CGM);
2987 
2988       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2992       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2993       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994                             FunctionArgList(), Loc, Loc);
2995       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2996       CtorCGF.EmitAnyExprToMem(Init,
2997                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2998                                Init->getType().getQualifiers(),
2999                                /*IsInitializer=*/true);
3000       CtorCGF.FinishFunction();
3001       Ctor = Fn;
3002       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3003       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
3004     } else {
3005       Ctor = new llvm::GlobalVariable(
3006           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3007           llvm::GlobalValue::PrivateLinkage,
3008           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
3009       ID = Ctor;
3010     }
3011 
3012     // Register the information for the entry associated with the constructor.
3013     Out.clear();
3014     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
3016         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
3017   }
3018   if (VD->getType().isDestructedType() != QualType::DK_none) {
3019     llvm::Constant *Dtor;
3020     llvm::Constant *ID;
3021     if (CGM.getLangOpts().OpenMPIsDevice) {
3022       // Generate function that emits destructor call for the threadprivate
3023       // copy of the variable VD
3024       CodeGenFunction DtorCGF(CGM);
3025 
3026       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3027       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3028       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3029           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3030       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3031       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3032                             FunctionArgList(), Loc, Loc);
3033       // Create a scope with an artificial location for the body of this
3034       // function.
3035       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3036       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3037                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3038                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3039       DtorCGF.FinishFunction();
3040       Dtor = Fn;
3041       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3042       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3043     } else {
3044       Dtor = new llvm::GlobalVariable(
3045           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3046           llvm::GlobalValue::PrivateLinkage,
3047           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3048       ID = Dtor;
3049     }
3050     // Register the information for the entry associated with the destructor.
3051     Out.clear();
3052     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3053         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3054         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3055   }
3056   return CGM.getLangOpts().OpenMPIsDevice;
3057 }
3058 
3059 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3060                                                           QualType VarType,
3061                                                           StringRef Name) {
3062   std::string Suffix = getName({"artificial", ""});
3063   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3064   llvm::Value *GAddr =
3065       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3066   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3067       CGM.getTarget().isTLSSupported()) {
3068     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3069     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3070   }
3071   std::string CacheSuffix = getName({"cache", ""});
3072   llvm::Value *Args[] = {
3073       emitUpdateLocation(CGF, SourceLocation()),
3074       getThreadID(CGF, SourceLocation()),
3075       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3076       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3077                                 /*isSigned=*/false),
3078       getOrCreateInternalVariable(
3079           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3080   return Address(
3081       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3082           CGF.EmitRuntimeCall(
3083               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3084           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3085       CGM.getContext().getTypeAlignInChars(VarType));
3086 }
3087 
3088 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3089                                    const RegionCodeGenTy &ThenGen,
3090                                    const RegionCodeGenTy &ElseGen) {
3091   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3092 
3093   // If the condition constant folds and can be elided, try to avoid emitting
3094   // the condition and the dead arm of the if/else.
3095   bool CondConstant;
3096   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3097     if (CondConstant)
3098       ThenGen(CGF);
3099     else
3100       ElseGen(CGF);
3101     return;
3102   }
3103 
3104   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3105   // emit the conditional branch.
3106   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3107   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3108   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3109   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3110 
3111   // Emit the 'then' code.
3112   CGF.EmitBlock(ThenBlock);
3113   ThenGen(CGF);
3114   CGF.EmitBranch(ContBlock);
3115   // Emit the 'else' code if present.
3116   // There is no need to emit line number for unconditional branch.
3117   (void)ApplyDebugLocation::CreateEmpty(CGF);
3118   CGF.EmitBlock(ElseBlock);
3119   ElseGen(CGF);
3120   // There is no need to emit line number for unconditional branch.
3121   (void)ApplyDebugLocation::CreateEmpty(CGF);
3122   CGF.EmitBranch(ContBlock);
3123   // Emit the continuation block for code after the if.
3124   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3125 }
3126 
3127 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3128                                        llvm::Function *OutlinedFn,
3129                                        ArrayRef<llvm::Value *> CapturedVars,
3130                                        const Expr *IfCond) {
3131   if (!CGF.HaveInsertPoint())
3132     return;
3133   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3134   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3135                                                      PrePostActionTy &) {
3136     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3137     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3138     llvm::Value *Args[] = {
3139         RTLoc,
3140         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3141         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3142     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3143     RealArgs.append(std::begin(Args), std::end(Args));
3144     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3145 
3146     llvm::FunctionCallee RTLFn =
3147         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3148     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3149   };
3150   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3151                                                           PrePostActionTy &) {
3152     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3153     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3154     // Build calls:
3155     // __kmpc_serialized_parallel(&Loc, GTid);
3156     llvm::Value *Args[] = {RTLoc, ThreadID};
3157     CGF.EmitRuntimeCall(
3158         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3159 
3160     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3161     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3162     Address ZeroAddrBound =
3163         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3164                                          /*Name=*/".bound.zero.addr");
3165     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3166     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3167     // ThreadId for serialized parallels is 0.
3168     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3169     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3170     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3171     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3172 
3173     // __kmpc_end_serialized_parallel(&Loc, GTid);
3174     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3175     CGF.EmitRuntimeCall(
3176         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3177         EndArgs);
3178   };
3179   if (IfCond) {
3180     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3181   } else {
3182     RegionCodeGenTy ThenRCG(ThenGen);
3183     ThenRCG(CGF);
3184   }
3185 }
3186 
3187 // If we're inside an (outlined) parallel region, use the region info's
3188 // thread-ID variable (it is passed in a first argument of the outlined function
3189 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3190 // regular serial code region, get thread ID by calling kmp_int32
3191 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3192 // return the address of that temp.
3193 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3194                                              SourceLocation Loc) {
3195   if (auto *OMPRegionInfo =
3196           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3197     if (OMPRegionInfo->getThreadIDVariable())
3198       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3199 
3200   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3201   QualType Int32Ty =
3202       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3203   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3204   CGF.EmitStoreOfScalar(ThreadID,
3205                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3206 
3207   return ThreadIDTemp;
3208 }
3209 
3210 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3211     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3212   SmallString<256> Buffer;
3213   llvm::raw_svector_ostream Out(Buffer);
3214   Out << Name;
3215   StringRef RuntimeName = Out.str();
3216   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3217   if (Elem.second) {
3218     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3219            "OMP internal variable has different type than requested");
3220     return &*Elem.second;
3221   }
3222 
3223   return Elem.second = new llvm::GlobalVariable(
3224              CGM.getModule(), Ty, /*IsConstant*/ false,
3225              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3226              Elem.first(), /*InsertBefore=*/nullptr,
3227              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3228 }
3229 
3230 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3231   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3232   std::string Name = getName({Prefix, "var"});
3233   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3234 }
3235 
3236 namespace {
3237 /// Common pre(post)-action for different OpenMP constructs.
3238 class CommonActionTy final : public PrePostActionTy {
3239   llvm::FunctionCallee EnterCallee;
3240   ArrayRef<llvm::Value *> EnterArgs;
3241   llvm::FunctionCallee ExitCallee;
3242   ArrayRef<llvm::Value *> ExitArgs;
3243   bool Conditional;
3244   llvm::BasicBlock *ContBlock = nullptr;
3245 
3246 public:
3247   CommonActionTy(llvm::FunctionCallee EnterCallee,
3248                  ArrayRef<llvm::Value *> EnterArgs,
3249                  llvm::FunctionCallee ExitCallee,
3250                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3251       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3252         ExitArgs(ExitArgs), Conditional(Conditional) {}
3253   void Enter(CodeGenFunction &CGF) override {
3254     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3255     if (Conditional) {
3256       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3257       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3258       ContBlock = CGF.createBasicBlock("omp_if.end");
3259       // Generate the branch (If-stmt)
3260       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3261       CGF.EmitBlock(ThenBlock);
3262     }
3263   }
3264   void Done(CodeGenFunction &CGF) {
3265     // Emit the rest of blocks/branches
3266     CGF.EmitBranch(ContBlock);
3267     CGF.EmitBlock(ContBlock, true);
3268   }
3269   void Exit(CodeGenFunction &CGF) override {
3270     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3271   }
3272 };
3273 } // anonymous namespace
3274 
3275 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3276                                          StringRef CriticalName,
3277                                          const RegionCodeGenTy &CriticalOpGen,
3278                                          SourceLocation Loc, const Expr *Hint) {
3279   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3280   // CriticalOpGen();
3281   // __kmpc_end_critical(ident_t *, gtid, Lock);
3282   // Prepare arguments and build a call to __kmpc_critical
3283   if (!CGF.HaveInsertPoint())
3284     return;
3285   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3286                          getCriticalRegionLock(CriticalName)};
3287   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3288                                                 std::end(Args));
3289   if (Hint) {
3290     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3291         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3292   }
3293   CommonActionTy Action(
3294       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3295                                  : OMPRTL__kmpc_critical),
3296       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3297   CriticalOpGen.setAction(Action);
3298   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3299 }
3300 
3301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3302                                        const RegionCodeGenTy &MasterOpGen,
3303                                        SourceLocation Loc) {
3304   if (!CGF.HaveInsertPoint())
3305     return;
3306   // if(__kmpc_master(ident_t *, gtid)) {
3307   //   MasterOpGen();
3308   //   __kmpc_end_master(ident_t *, gtid);
3309   // }
3310   // Prepare arguments and build a call to __kmpc_master
3311   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3312   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3313                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3314                         /*Conditional=*/true);
3315   MasterOpGen.setAction(Action);
3316   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3317   Action.Done(CGF);
3318 }
3319 
3320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3321                                         SourceLocation Loc) {
3322   if (!CGF.HaveInsertPoint())
3323     return;
3324   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3325   if (OMPBuilder) {
3326     OMPBuilder->CreateTaskyield(CGF.Builder);
3327   } else {
3328     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3329     llvm::Value *Args[] = {
3330         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3331         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3332     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3333                         Args);
3334   }
3335 
3336   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3337     Region->emitUntiedSwitch(CGF);
3338 }
3339 
3340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3341                                           const RegionCodeGenTy &TaskgroupOpGen,
3342                                           SourceLocation Loc) {
3343   if (!CGF.HaveInsertPoint())
3344     return;
3345   // __kmpc_taskgroup(ident_t *, gtid);
3346   // TaskgroupOpGen();
3347   // __kmpc_end_taskgroup(ident_t *, gtid);
3348   // Prepare arguments and build a call to __kmpc_taskgroup
3349   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3350   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3351                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3352                         Args);
3353   TaskgroupOpGen.setAction(Action);
3354   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3355 }
3356 
3357 /// Given an array of pointers to variables, project the address of a
3358 /// given variable.
3359 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3360                                       unsigned Index, const VarDecl *Var) {
3361   // Pull out the pointer to the variable.
3362   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3363   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3364 
3365   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3366   Addr = CGF.Builder.CreateElementBitCast(
3367       Addr, CGF.ConvertTypeForMem(Var->getType()));
3368   return Addr;
3369 }
3370 
3371 static llvm::Value *emitCopyprivateCopyFunction(
3372     CodeGenModule &CGM, llvm::Type *ArgsType,
3373     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3374     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3375     SourceLocation Loc) {
3376   ASTContext &C = CGM.getContext();
3377   // void copy_func(void *LHSArg, void *RHSArg);
3378   FunctionArgList Args;
3379   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3380                            ImplicitParamDecl::Other);
3381   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3382                            ImplicitParamDecl::Other);
3383   Args.push_back(&LHSArg);
3384   Args.push_back(&RHSArg);
3385   const auto &CGFI =
3386       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3387   std::string Name =
3388       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3389   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3390                                     llvm::GlobalValue::InternalLinkage, Name,
3391                                     &CGM.getModule());
3392   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3393   Fn->setDoesNotRecurse();
3394   CodeGenFunction CGF(CGM);
3395   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3396   // Dest = (void*[n])(LHSArg);
3397   // Src = (void*[n])(RHSArg);
3398   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3399       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3400       ArgsType), CGF.getPointerAlign());
3401   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3402       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3403       ArgsType), CGF.getPointerAlign());
3404   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3405   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3406   // ...
3407   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3408   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3409     const auto *DestVar =
3410         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3411     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3412 
3413     const auto *SrcVar =
3414         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3415     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3416 
3417     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3418     QualType Type = VD->getType();
3419     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3420   }
3421   CGF.FinishFunction();
3422   return Fn;
3423 }
3424 
3425 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3426                                        const RegionCodeGenTy &SingleOpGen,
3427                                        SourceLocation Loc,
3428                                        ArrayRef<const Expr *> CopyprivateVars,
3429                                        ArrayRef<const Expr *> SrcExprs,
3430                                        ArrayRef<const Expr *> DstExprs,
3431                                        ArrayRef<const Expr *> AssignmentOps) {
3432   if (!CGF.HaveInsertPoint())
3433     return;
3434   assert(CopyprivateVars.size() == SrcExprs.size() &&
3435          CopyprivateVars.size() == DstExprs.size() &&
3436          CopyprivateVars.size() == AssignmentOps.size());
3437   ASTContext &C = CGM.getContext();
3438   // int32 did_it = 0;
3439   // if(__kmpc_single(ident_t *, gtid)) {
3440   //   SingleOpGen();
3441   //   __kmpc_end_single(ident_t *, gtid);
3442   //   did_it = 1;
3443   // }
3444   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3445   // <copy_func>, did_it);
3446 
3447   Address DidIt = Address::invalid();
3448   if (!CopyprivateVars.empty()) {
3449     // int32 did_it = 0;
3450     QualType KmpInt32Ty =
3451         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3452     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3453     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3454   }
3455   // Prepare arguments and build a call to __kmpc_single
3456   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3457   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3458                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3459                         /*Conditional=*/true);
3460   SingleOpGen.setAction(Action);
3461   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3462   if (DidIt.isValid()) {
3463     // did_it = 1;
3464     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3465   }
3466   Action.Done(CGF);
3467   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3468   // <copy_func>, did_it);
3469   if (DidIt.isValid()) {
3470     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3471     QualType CopyprivateArrayTy = C.getConstantArrayType(
3472         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3473         /*IndexTypeQuals=*/0);
3474     // Create a list of all private variables for copyprivate.
3475     Address CopyprivateList =
3476         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3477     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3478       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3479       CGF.Builder.CreateStore(
3480           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3481               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3482               CGF.VoidPtrTy),
3483           Elem);
3484     }
3485     // Build function that copies private values from single region to all other
3486     // threads in the corresponding parallel region.
3487     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3488         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3489         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3490     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3491     Address CL =
3492       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3493                                                       CGF.VoidPtrTy);
3494     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3495     llvm::Value *Args[] = {
3496         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3497         getThreadID(CGF, Loc),        // i32 <gtid>
3498         BufSize,                      // size_t <buf_size>
3499         CL.getPointer(),              // void *<copyprivate list>
3500         CpyFn,                        // void (*) (void *, void *) <copy_func>
3501         DidItVal                      // i32 did_it
3502     };
3503     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3504   }
3505 }
3506 
3507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3508                                         const RegionCodeGenTy &OrderedOpGen,
3509                                         SourceLocation Loc, bool IsThreads) {
3510   if (!CGF.HaveInsertPoint())
3511     return;
3512   // __kmpc_ordered(ident_t *, gtid);
3513   // OrderedOpGen();
3514   // __kmpc_end_ordered(ident_t *, gtid);
3515   // Prepare arguments and build a call to __kmpc_ordered
3516   if (IsThreads) {
3517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3518     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3519                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3520                           Args);
3521     OrderedOpGen.setAction(Action);
3522     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3523     return;
3524   }
3525   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3526 }
3527 
3528 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3529   unsigned Flags;
3530   if (Kind == OMPD_for)
3531     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3532   else if (Kind == OMPD_sections)
3533     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3534   else if (Kind == OMPD_single)
3535     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3536   else if (Kind == OMPD_barrier)
3537     Flags = OMP_IDENT_BARRIER_EXPL;
3538   else
3539     Flags = OMP_IDENT_BARRIER_IMPL;
3540   return Flags;
3541 }
3542 
3543 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3544     CodeGenFunction &CGF, const OMPLoopDirective &S,
3545     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3546   // Check if the loop directive is actually a doacross loop directive. In this
3547   // case choose static, 1 schedule.
3548   if (llvm::any_of(
3549           S.getClausesOfKind<OMPOrderedClause>(),
3550           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3551     ScheduleKind = OMPC_SCHEDULE_static;
3552     // Chunk size is 1 in this case.
3553     llvm::APInt ChunkSize(32, 1);
3554     ChunkExpr = IntegerLiteral::Create(
3555         CGF.getContext(), ChunkSize,
3556         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3557         SourceLocation());
3558   }
3559 }
3560 
3561 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3562                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3563                                       bool ForceSimpleCall) {
3564   // Check if we should use the OMPBuilder
3565   auto *OMPRegionInfo =
3566       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3567   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3568   if (OMPBuilder) {
3569     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3570         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3571     return;
3572   }
3573 
3574   if (!CGF.HaveInsertPoint())
3575     return;
3576   // Build call __kmpc_cancel_barrier(loc, thread_id);
3577   // Build call __kmpc_barrier(loc, thread_id);
3578   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3579   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3580   // thread_id);
3581   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3582                          getThreadID(CGF, Loc)};
3583   if (OMPRegionInfo) {
3584     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3585       llvm::Value *Result = CGF.EmitRuntimeCall(
3586           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3587       if (EmitChecks) {
3588         // if (__kmpc_cancel_barrier()) {
3589         //   exit from construct;
3590         // }
3591         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3592         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3593         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3594         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3595         CGF.EmitBlock(ExitBB);
3596         //   exit from construct;
3597         CodeGenFunction::JumpDest CancelDestination =
3598             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3599         CGF.EmitBranchThroughCleanup(CancelDestination);
3600         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3601       }
3602       return;
3603     }
3604   }
3605   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3606 }
3607 
3608 /// Map the OpenMP loop schedule to the runtime enumeration.
3609 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3610                                           bool Chunked, bool Ordered) {
3611   switch (ScheduleKind) {
3612   case OMPC_SCHEDULE_static:
3613     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3614                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3615   case OMPC_SCHEDULE_dynamic:
3616     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3617   case OMPC_SCHEDULE_guided:
3618     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3619   case OMPC_SCHEDULE_runtime:
3620     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3621   case OMPC_SCHEDULE_auto:
3622     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3623   case OMPC_SCHEDULE_unknown:
3624     assert(!Chunked && "chunk was specified but schedule kind not known");
3625     return Ordered ? OMP_ord_static : OMP_sch_static;
3626   }
3627   llvm_unreachable("Unexpected runtime schedule");
3628 }
3629 
3630 /// Map the OpenMP distribute schedule to the runtime enumeration.
3631 static OpenMPSchedType
3632 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3633   // only static is allowed for dist_schedule
3634   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3635 }
3636 
3637 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3638                                          bool Chunked) const {
3639   OpenMPSchedType Schedule =
3640       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3641   return Schedule == OMP_sch_static;
3642 }
3643 
3644 bool CGOpenMPRuntime::isStaticNonchunked(
3645     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3646   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3647   return Schedule == OMP_dist_sch_static;
3648 }
3649 
3650 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3651                                       bool Chunked) const {
3652   OpenMPSchedType Schedule =
3653       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3654   return Schedule == OMP_sch_static_chunked;
3655 }
3656 
3657 bool CGOpenMPRuntime::isStaticChunked(
3658     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3659   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3660   return Schedule == OMP_dist_sch_static_chunked;
3661 }
3662 
3663 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3664   OpenMPSchedType Schedule =
3665       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3666   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3667   return Schedule != OMP_sch_static;
3668 }
3669 
3670 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3671                                   OpenMPScheduleClauseModifier M1,
3672                                   OpenMPScheduleClauseModifier M2) {
3673   int Modifier = 0;
3674   switch (M1) {
3675   case OMPC_SCHEDULE_MODIFIER_monotonic:
3676     Modifier = OMP_sch_modifier_monotonic;
3677     break;
3678   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3679     Modifier = OMP_sch_modifier_nonmonotonic;
3680     break;
3681   case OMPC_SCHEDULE_MODIFIER_simd:
3682     if (Schedule == OMP_sch_static_chunked)
3683       Schedule = OMP_sch_static_balanced_chunked;
3684     break;
3685   case OMPC_SCHEDULE_MODIFIER_last:
3686   case OMPC_SCHEDULE_MODIFIER_unknown:
3687     break;
3688   }
3689   switch (M2) {
3690   case OMPC_SCHEDULE_MODIFIER_monotonic:
3691     Modifier = OMP_sch_modifier_monotonic;
3692     break;
3693   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3694     Modifier = OMP_sch_modifier_nonmonotonic;
3695     break;
3696   case OMPC_SCHEDULE_MODIFIER_simd:
3697     if (Schedule == OMP_sch_static_chunked)
3698       Schedule = OMP_sch_static_balanced_chunked;
3699     break;
3700   case OMPC_SCHEDULE_MODIFIER_last:
3701   case OMPC_SCHEDULE_MODIFIER_unknown:
3702     break;
3703   }
3704   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3705   // If the static schedule kind is specified or if the ordered clause is
3706   // specified, and if the nonmonotonic modifier is not specified, the effect is
3707   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3708   // modifier is specified, the effect is as if the nonmonotonic modifier is
3709   // specified.
3710   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3711     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3712           Schedule == OMP_sch_static_balanced_chunked ||
3713           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3714           Schedule == OMP_dist_sch_static_chunked ||
3715           Schedule == OMP_dist_sch_static))
3716       Modifier = OMP_sch_modifier_nonmonotonic;
3717   }
3718   return Schedule | Modifier;
3719 }
3720 
3721 void CGOpenMPRuntime::emitForDispatchInit(
3722     CodeGenFunction &CGF, SourceLocation Loc,
3723     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3724     bool Ordered, const DispatchRTInput &DispatchValues) {
3725   if (!CGF.HaveInsertPoint())
3726     return;
3727   OpenMPSchedType Schedule = getRuntimeSchedule(
3728       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3729   assert(Ordered ||
3730          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3731           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3732           Schedule != OMP_sch_static_balanced_chunked));
3733   // Call __kmpc_dispatch_init(
3734   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3735   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3736   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3737 
3738   // If the Chunk was not specified in the clause - use default value 1.
3739   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3740                                             : CGF.Builder.getIntN(IVSize, 1);
3741   llvm::Value *Args[] = {
3742       emitUpdateLocation(CGF, Loc),
3743       getThreadID(CGF, Loc),
3744       CGF.Builder.getInt32(addMonoNonMonoModifier(
3745           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3746       DispatchValues.LB,                                     // Lower
3747       DispatchValues.UB,                                     // Upper
3748       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3749       Chunk                                                  // Chunk
3750   };
3751   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3752 }
3753 
3754 static void emitForStaticInitCall(
3755     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3756     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3757     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3758     const CGOpenMPRuntime::StaticRTInput &Values) {
3759   if (!CGF.HaveInsertPoint())
3760     return;
3761 
3762   assert(!Values.Ordered);
3763   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3764          Schedule == OMP_sch_static_balanced_chunked ||
3765          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3766          Schedule == OMP_dist_sch_static ||
3767          Schedule == OMP_dist_sch_static_chunked);
3768 
3769   // Call __kmpc_for_static_init(
3770   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3771   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3772   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3773   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3774   llvm::Value *Chunk = Values.Chunk;
3775   if (Chunk == nullptr) {
3776     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3777             Schedule == OMP_dist_sch_static) &&
3778            "expected static non-chunked schedule");
3779     // If the Chunk was not specified in the clause - use default value 1.
3780     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3781   } else {
3782     assert((Schedule == OMP_sch_static_chunked ||
3783             Schedule == OMP_sch_static_balanced_chunked ||
3784             Schedule == OMP_ord_static_chunked ||
3785             Schedule == OMP_dist_sch_static_chunked) &&
3786            "expected static chunked schedule");
3787   }
3788   llvm::Value *Args[] = {
3789       UpdateLocation,
3790       ThreadId,
3791       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3792                                                   M2)), // Schedule type
3793       Values.IL.getPointer(),                           // &isLastIter
3794       Values.LB.getPointer(),                           // &LB
3795       Values.UB.getPointer(),                           // &UB
3796       Values.ST.getPointer(),                           // &Stride
3797       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3798       Chunk                                             // Chunk
3799   };
3800   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3801 }
3802 
3803 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3804                                         SourceLocation Loc,
3805                                         OpenMPDirectiveKind DKind,
3806                                         const OpenMPScheduleTy &ScheduleKind,
3807                                         const StaticRTInput &Values) {
3808   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3809       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3810   assert(isOpenMPWorksharingDirective(DKind) &&
3811          "Expected loop-based or sections-based directive.");
3812   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3813                                              isOpenMPLoopDirective(DKind)
3814                                                  ? OMP_IDENT_WORK_LOOP
3815                                                  : OMP_IDENT_WORK_SECTIONS);
3816   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3817   llvm::FunctionCallee StaticInitFunction =
3818       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3819   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3820   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3821                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3822 }
3823 
3824 void CGOpenMPRuntime::emitDistributeStaticInit(
3825     CodeGenFunction &CGF, SourceLocation Loc,
3826     OpenMPDistScheduleClauseKind SchedKind,
3827     const CGOpenMPRuntime::StaticRTInput &Values) {
3828   OpenMPSchedType ScheduleNum =
3829       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3830   llvm::Value *UpdatedLocation =
3831       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3832   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3833   llvm::FunctionCallee StaticInitFunction =
3834       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3835   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3836                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3837                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3838 }
3839 
3840 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3841                                           SourceLocation Loc,
3842                                           OpenMPDirectiveKind DKind) {
3843   if (!CGF.HaveInsertPoint())
3844     return;
3845   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3846   llvm::Value *Args[] = {
3847       emitUpdateLocation(CGF, Loc,
3848                          isOpenMPDistributeDirective(DKind)
3849                              ? OMP_IDENT_WORK_DISTRIBUTE
3850                              : isOpenMPLoopDirective(DKind)
3851                                    ? OMP_IDENT_WORK_LOOP
3852                                    : OMP_IDENT_WORK_SECTIONS),
3853       getThreadID(CGF, Loc)};
3854   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3855   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3856                       Args);
3857 }
3858 
3859 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3860                                                  SourceLocation Loc,
3861                                                  unsigned IVSize,
3862                                                  bool IVSigned) {
3863   if (!CGF.HaveInsertPoint())
3864     return;
3865   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3866   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3867   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3868 }
3869 
3870 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3871                                           SourceLocation Loc, unsigned IVSize,
3872                                           bool IVSigned, Address IL,
3873                                           Address LB, Address UB,
3874                                           Address ST) {
3875   // Call __kmpc_dispatch_next(
3876   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3877   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3878   //          kmp_int[32|64] *p_stride);
3879   llvm::Value *Args[] = {
3880       emitUpdateLocation(CGF, Loc),
3881       getThreadID(CGF, Loc),
3882       IL.getPointer(), // &isLastIter
3883       LB.getPointer(), // &Lower
3884       UB.getPointer(), // &Upper
3885       ST.getPointer()  // &Stride
3886   };
3887   llvm::Value *Call =
3888       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3889   return CGF.EmitScalarConversion(
3890       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3891       CGF.getContext().BoolTy, Loc);
3892 }
3893 
3894 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3895                                            llvm::Value *NumThreads,
3896                                            SourceLocation Loc) {
3897   if (!CGF.HaveInsertPoint())
3898     return;
3899   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3900   llvm::Value *Args[] = {
3901       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3902       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3903   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3904                       Args);
3905 }
3906 
3907 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3908                                          ProcBindKind ProcBind,
3909                                          SourceLocation Loc) {
3910   if (!CGF.HaveInsertPoint())
3911     return;
3912   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3913   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3914   llvm::Value *Args[] = {
3915       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3916       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3917   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3918 }
3919 
3920 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3921                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3922   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3923   if (OMPBuilder) {
3924     OMPBuilder->CreateFlush(CGF.Builder);
3925   } else {
3926     if (!CGF.HaveInsertPoint())
3927       return;
3928     // Build call void __kmpc_flush(ident_t *loc)
3929     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3930                         emitUpdateLocation(CGF, Loc));
3931   }
3932 }
3933 
3934 namespace {
3935 /// Indexes of fields for type kmp_task_t.
3936 enum KmpTaskTFields {
3937   /// List of shared variables.
3938   KmpTaskTShareds,
3939   /// Task routine.
3940   KmpTaskTRoutine,
3941   /// Partition id for the untied tasks.
3942   KmpTaskTPartId,
3943   /// Function with call of destructors for private variables.
3944   Data1,
3945   /// Task priority.
3946   Data2,
3947   /// (Taskloops only) Lower bound.
3948   KmpTaskTLowerBound,
3949   /// (Taskloops only) Upper bound.
3950   KmpTaskTUpperBound,
3951   /// (Taskloops only) Stride.
3952   KmpTaskTStride,
3953   /// (Taskloops only) Is last iteration flag.
3954   KmpTaskTLastIter,
3955   /// (Taskloops only) Reduction data.
3956   KmpTaskTReductions,
3957 };
3958 } // anonymous namespace
3959 
3960 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3961   return OffloadEntriesTargetRegion.empty() &&
3962          OffloadEntriesDeviceGlobalVar.empty();
3963 }
3964 
3965 /// Initialize target region entry.
3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3967     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3968                                     StringRef ParentName, unsigned LineNum,
3969                                     unsigned Order) {
3970   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3971                                              "only required for the device "
3972                                              "code generation.");
3973   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3974       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3975                                    OMPTargetRegionEntryTargetRegion);
3976   ++OffloadingEntriesNum;
3977 }
3978 
3979 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3980     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3981                                   StringRef ParentName, unsigned LineNum,
3982                                   llvm::Constant *Addr, llvm::Constant *ID,
3983                                   OMPTargetRegionEntryKind Flags) {
3984   // If we are emitting code for a target, the entry is already initialized,
3985   // only has to be registered.
3986   if (CGM.getLangOpts().OpenMPIsDevice) {
3987     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3988       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3989           DiagnosticsEngine::Error,
3990           "Unable to find target region on line '%0' in the device code.");
3991       CGM.getDiags().Report(DiagID) << LineNum;
3992       return;
3993     }
3994     auto &Entry =
3995         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3996     assert(Entry.isValid() && "Entry not initialized!");
3997     Entry.setAddress(Addr);
3998     Entry.setID(ID);
3999     Entry.setFlags(Flags);
4000   } else {
4001     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
4002     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
4003     ++OffloadingEntriesNum;
4004   }
4005 }
4006 
4007 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
4008     unsigned DeviceID, unsigned FileID, StringRef ParentName,
4009     unsigned LineNum) const {
4010   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
4011   if (PerDevice == OffloadEntriesTargetRegion.end())
4012     return false;
4013   auto PerFile = PerDevice->second.find(FileID);
4014   if (PerFile == PerDevice->second.end())
4015     return false;
4016   auto PerParentName = PerFile->second.find(ParentName);
4017   if (PerParentName == PerFile->second.end())
4018     return false;
4019   auto PerLine = PerParentName->second.find(LineNum);
4020   if (PerLine == PerParentName->second.end())
4021     return false;
4022   // Fail if this entry is already registered.
4023   if (PerLine->second.getAddress() || PerLine->second.getID())
4024     return false;
4025   return true;
4026 }
4027 
4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4029     const OffloadTargetRegionEntryInfoActTy &Action) {
4030   // Scan all target region entries and perform the provided action.
4031   for (const auto &D : OffloadEntriesTargetRegion)
4032     for (const auto &F : D.second)
4033       for (const auto &P : F.second)
4034         for (const auto &L : P.second)
4035           Action(D.first, F.first, P.first(), L.first, L.second);
4036 }
4037 
4038 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4039     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4040                                        OMPTargetGlobalVarEntryKind Flags,
4041                                        unsigned Order) {
4042   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4043                                              "only required for the device "
4044                                              "code generation.");
4045   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4046   ++OffloadingEntriesNum;
4047 }
4048 
4049 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4050     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4051                                      CharUnits VarSize,
4052                                      OMPTargetGlobalVarEntryKind Flags,
4053                                      llvm::GlobalValue::LinkageTypes Linkage) {
4054   if (CGM.getLangOpts().OpenMPIsDevice) {
4055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4056     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4057            "Entry not initialized!");
4058     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4059            "Resetting with the new address.");
4060     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4061       if (Entry.getVarSize().isZero()) {
4062         Entry.setVarSize(VarSize);
4063         Entry.setLinkage(Linkage);
4064       }
4065       return;
4066     }
4067     Entry.setVarSize(VarSize);
4068     Entry.setLinkage(Linkage);
4069     Entry.setAddress(Addr);
4070   } else {
4071     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4072       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4073       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4074              "Entry not initialized!");
4075       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4076              "Resetting with the new address.");
4077       if (Entry.getVarSize().isZero()) {
4078         Entry.setVarSize(VarSize);
4079         Entry.setLinkage(Linkage);
4080       }
4081       return;
4082     }
4083     OffloadEntriesDeviceGlobalVar.try_emplace(
4084         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4085     ++OffloadingEntriesNum;
4086   }
4087 }
4088 
4089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4090     actOnDeviceGlobalVarEntriesInfo(
4091         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4092   // Scan all target region entries and perform the provided action.
4093   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4094     Action(E.getKey(), E.getValue());
4095 }
4096 
4097 void CGOpenMPRuntime::createOffloadEntry(
4098     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4099     llvm::GlobalValue::LinkageTypes Linkage) {
4100   StringRef Name = Addr->getName();
4101   llvm::Module &M = CGM.getModule();
4102   llvm::LLVMContext &C = M.getContext();
4103 
4104   // Create constant string with the name.
4105   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4106 
4107   std::string StringName = getName({"omp_offloading", "entry_name"});
4108   auto *Str = new llvm::GlobalVariable(
4109       M, StrPtrInit->getType(), /*isConstant=*/true,
4110       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4111   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4112 
4113   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4114                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4115                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4116                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4117                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4118   std::string EntryName = getName({"omp_offloading", "entry", ""});
4119   llvm::GlobalVariable *Entry = createGlobalStruct(
4120       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4121       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4122 
4123   // The entry has to be created in the section the linker expects it to be.
4124   Entry->setSection("omp_offloading_entries");
4125 }
4126 
4127 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4128   // Emit the offloading entries and metadata so that the device codegen side
4129   // can easily figure out what to emit. The produced metadata looks like
4130   // this:
4131   //
4132   // !omp_offload.info = !{!1, ...}
4133   //
4134   // Right now we only generate metadata for function that contain target
4135   // regions.
4136 
4137   // If we are in simd mode or there are no entries, we don't need to do
4138   // anything.
4139   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4140     return;
4141 
4142   llvm::Module &M = CGM.getModule();
4143   llvm::LLVMContext &C = M.getContext();
4144   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4145                          SourceLocation, StringRef>,
4146               16>
4147       OrderedEntries(OffloadEntriesInfoManager.size());
4148   llvm::SmallVector<StringRef, 16> ParentFunctions(
4149       OffloadEntriesInfoManager.size());
4150 
4151   // Auxiliary methods to create metadata values and strings.
4152   auto &&GetMDInt = [this](unsigned V) {
4153     return llvm::ConstantAsMetadata::get(
4154         llvm::ConstantInt::get(CGM.Int32Ty, V));
4155   };
4156 
4157   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4158 
4159   // Create the offloading info metadata node.
4160   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4161 
4162   // Create function that emits metadata for each target region entry;
4163   auto &&TargetRegionMetadataEmitter =
4164       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4165        &GetMDString](
4166           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4167           unsigned Line,
4168           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4169         // Generate metadata for target regions. Each entry of this metadata
4170         // contains:
4171         // - Entry 0 -> Kind of this type of metadata (0).
4172         // - Entry 1 -> Device ID of the file where the entry was identified.
4173         // - Entry 2 -> File ID of the file where the entry was identified.
4174         // - Entry 3 -> Mangled name of the function where the entry was
4175         // identified.
4176         // - Entry 4 -> Line in the file where the entry was identified.
4177         // - Entry 5 -> Order the entry was created.
4178         // The first element of the metadata node is the kind.
4179         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4180                                  GetMDInt(FileID),      GetMDString(ParentName),
4181                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4182 
4183         SourceLocation Loc;
4184         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4185                   E = CGM.getContext().getSourceManager().fileinfo_end();
4186              I != E; ++I) {
4187           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4188               I->getFirst()->getUniqueID().getFile() == FileID) {
4189             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4190                 I->getFirst(), Line, 1);
4191             break;
4192           }
4193         }
4194         // Save this entry in the right position of the ordered entries array.
4195         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4196         ParentFunctions[E.getOrder()] = ParentName;
4197 
4198         // Add metadata to the named metadata node.
4199         MD->addOperand(llvm::MDNode::get(C, Ops));
4200       };
4201 
4202   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4203       TargetRegionMetadataEmitter);
4204 
4205   // Create function that emits metadata for each device global variable entry;
4206   auto &&DeviceGlobalVarMetadataEmitter =
4207       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4208        MD](StringRef MangledName,
4209            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4210                &E) {
4211         // Generate metadata for global variables. Each entry of this metadata
4212         // contains:
4213         // - Entry 0 -> Kind of this type of metadata (1).
4214         // - Entry 1 -> Mangled name of the variable.
4215         // - Entry 2 -> Declare target kind.
4216         // - Entry 3 -> Order the entry was created.
4217         // The first element of the metadata node is the kind.
4218         llvm::Metadata *Ops[] = {
4219             GetMDInt(E.getKind()), GetMDString(MangledName),
4220             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4221 
4222         // Save this entry in the right position of the ordered entries array.
4223         OrderedEntries[E.getOrder()] =
4224             std::make_tuple(&E, SourceLocation(), MangledName);
4225 
4226         // Add metadata to the named metadata node.
4227         MD->addOperand(llvm::MDNode::get(C, Ops));
4228       };
4229 
4230   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4231       DeviceGlobalVarMetadataEmitter);
4232 
4233   for (const auto &E : OrderedEntries) {
4234     assert(std::get<0>(E) && "All ordered entries must exist!");
4235     if (const auto *CE =
4236             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4237                 std::get<0>(E))) {
4238       if (!CE->getID() || !CE->getAddress()) {
4239         // Do not blame the entry if the parent funtion is not emitted.
4240         StringRef FnName = ParentFunctions[CE->getOrder()];
4241         if (!CGM.GetGlobalValue(FnName))
4242           continue;
4243         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4244             DiagnosticsEngine::Error,
4245             "Offloading entry for target region in %0 is incorrect: either the "
4246             "address or the ID is invalid.");
4247         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4248         continue;
4249       }
4250       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4251                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4252     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4253                                              OffloadEntryInfoDeviceGlobalVar>(
4254                    std::get<0>(E))) {
4255       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4256           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4257               CE->getFlags());
4258       switch (Flags) {
4259       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4260         if (CGM.getLangOpts().OpenMPIsDevice &&
4261             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4262           continue;
4263         if (!CE->getAddress()) {
4264           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4265               DiagnosticsEngine::Error, "Offloading entry for declare target "
4266                                         "variable %0 is incorrect: the "
4267                                         "address is invalid.");
4268           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4269           continue;
4270         }
4271         // The vaiable has no definition - no need to add the entry.
4272         if (CE->getVarSize().isZero())
4273           continue;
4274         break;
4275       }
4276       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4277         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4278                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4279                "Declaret target link address is set.");
4280         if (CGM.getLangOpts().OpenMPIsDevice)
4281           continue;
4282         if (!CE->getAddress()) {
4283           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4284               DiagnosticsEngine::Error,
4285               "Offloading entry for declare target variable is incorrect: the "
4286               "address is invalid.");
4287           CGM.getDiags().Report(DiagID);
4288           continue;
4289         }
4290         break;
4291       }
4292       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4293                          CE->getVarSize().getQuantity(), Flags,
4294                          CE->getLinkage());
4295     } else {
4296       llvm_unreachable("Unsupported entry kind.");
4297     }
4298   }
4299 }
4300 
4301 /// Loads all the offload entries information from the host IR
4302 /// metadata.
4303 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4304   // If we are in target mode, load the metadata from the host IR. This code has
4305   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4306 
4307   if (!CGM.getLangOpts().OpenMPIsDevice)
4308     return;
4309 
4310   if (CGM.getLangOpts().OMPHostIRFile.empty())
4311     return;
4312 
4313   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4314   if (auto EC = Buf.getError()) {
4315     CGM.getDiags().Report(diag::err_cannot_open_file)
4316         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4317     return;
4318   }
4319 
4320   llvm::LLVMContext C;
4321   auto ME = expectedToErrorOrAndEmitErrors(
4322       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4323 
4324   if (auto EC = ME.getError()) {
4325     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4326         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4327     CGM.getDiags().Report(DiagID)
4328         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4329     return;
4330   }
4331 
4332   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4333   if (!MD)
4334     return;
4335 
4336   for (llvm::MDNode *MN : MD->operands()) {
4337     auto &&GetMDInt = [MN](unsigned Idx) {
4338       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4339       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4340     };
4341 
4342     auto &&GetMDString = [MN](unsigned Idx) {
4343       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4344       return V->getString();
4345     };
4346 
4347     switch (GetMDInt(0)) {
4348     default:
4349       llvm_unreachable("Unexpected metadata!");
4350       break;
4351     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4352         OffloadingEntryInfoTargetRegion:
4353       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4354           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4355           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4356           /*Order=*/GetMDInt(5));
4357       break;
4358     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4359         OffloadingEntryInfoDeviceGlobalVar:
4360       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4361           /*MangledName=*/GetMDString(1),
4362           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4363               /*Flags=*/GetMDInt(2)),
4364           /*Order=*/GetMDInt(3));
4365       break;
4366     }
4367   }
4368 }
4369 
4370 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4371   if (!KmpRoutineEntryPtrTy) {
4372     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4373     ASTContext &C = CGM.getContext();
4374     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4375     FunctionProtoType::ExtProtoInfo EPI;
4376     KmpRoutineEntryPtrQTy = C.getPointerType(
4377         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4378     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4379   }
4380 }
4381 
4382 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4383   // Make sure the type of the entry is already created. This is the type we
4384   // have to create:
4385   // struct __tgt_offload_entry{
4386   //   void      *addr;       // Pointer to the offload entry info.
4387   //                          // (function or global)
4388   //   char      *name;       // Name of the function or global.
4389   //   size_t     size;       // Size of the entry info (0 if it a function).
4390   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4391   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4392   // };
4393   if (TgtOffloadEntryQTy.isNull()) {
4394     ASTContext &C = CGM.getContext();
4395     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4396     RD->startDefinition();
4397     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4398     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4399     addFieldToRecordDecl(C, RD, C.getSizeType());
4400     addFieldToRecordDecl(
4401         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4402     addFieldToRecordDecl(
4403         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4404     RD->completeDefinition();
4405     RD->addAttr(PackedAttr::CreateImplicit(C));
4406     TgtOffloadEntryQTy = C.getRecordType(RD);
4407   }
4408   return TgtOffloadEntryQTy;
4409 }
4410 
4411 namespace {
4412 struct PrivateHelpersTy {
4413   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4414                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4415       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4416         PrivateElemInit(PrivateElemInit) {}
4417   const Expr *OriginalRef = nullptr;
4418   const VarDecl *Original = nullptr;
4419   const VarDecl *PrivateCopy = nullptr;
4420   const VarDecl *PrivateElemInit = nullptr;
4421 };
4422 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4423 } // anonymous namespace
4424 
4425 static RecordDecl *
4426 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4427   if (!Privates.empty()) {
4428     ASTContext &C = CGM.getContext();
4429     // Build struct .kmp_privates_t. {
4430     //         /*  private vars  */
4431     //       };
4432     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4433     RD->startDefinition();
4434     for (const auto &Pair : Privates) {
4435       const VarDecl *VD = Pair.second.Original;
4436       QualType Type = VD->getType().getNonReferenceType();
4437       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4438       if (VD->hasAttrs()) {
4439         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4440              E(VD->getAttrs().end());
4441              I != E; ++I)
4442           FD->addAttr(*I);
4443       }
4444     }
4445     RD->completeDefinition();
4446     return RD;
4447   }
4448   return nullptr;
4449 }
4450 
4451 static RecordDecl *
4452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4453                          QualType KmpInt32Ty,
4454                          QualType KmpRoutineEntryPointerQTy) {
4455   ASTContext &C = CGM.getContext();
4456   // Build struct kmp_task_t {
4457   //         void *              shareds;
4458   //         kmp_routine_entry_t routine;
4459   //         kmp_int32           part_id;
4460   //         kmp_cmplrdata_t data1;
4461   //         kmp_cmplrdata_t data2;
4462   // For taskloops additional fields:
4463   //         kmp_uint64          lb;
4464   //         kmp_uint64          ub;
4465   //         kmp_int64           st;
4466   //         kmp_int32           liter;
4467   //         void *              reductions;
4468   //       };
4469   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4470   UD->startDefinition();
4471   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4472   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4473   UD->completeDefinition();
4474   QualType KmpCmplrdataTy = C.getRecordType(UD);
4475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4476   RD->startDefinition();
4477   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4478   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4479   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4480   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4481   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4482   if (isOpenMPTaskLoopDirective(Kind)) {
4483     QualType KmpUInt64Ty =
4484         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4485     QualType KmpInt64Ty =
4486         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4487     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4488     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4489     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4490     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4491     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4492   }
4493   RD->completeDefinition();
4494   return RD;
4495 }
4496 
4497 static RecordDecl *
4498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4499                                      ArrayRef<PrivateDataTy> Privates) {
4500   ASTContext &C = CGM.getContext();
4501   // Build struct kmp_task_t_with_privates {
4502   //         kmp_task_t task_data;
4503   //         .kmp_privates_t. privates;
4504   //       };
4505   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4506   RD->startDefinition();
4507   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4508   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4509     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4510   RD->completeDefinition();
4511   return RD;
4512 }
4513 
4514 /// Emit a proxy function which accepts kmp_task_t as the second
4515 /// argument.
4516 /// \code
4517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4518 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4519 ///   For taskloops:
4520 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4521 ///   tt->reductions, tt->shareds);
4522 ///   return 0;
4523 /// }
4524 /// \endcode
4525 static llvm::Function *
4526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4527                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4528                       QualType KmpTaskTWithPrivatesPtrQTy,
4529                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4530                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4531                       llvm::Value *TaskPrivatesMap) {
4532   ASTContext &C = CGM.getContext();
4533   FunctionArgList Args;
4534   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4535                             ImplicitParamDecl::Other);
4536   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4537                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4538                                 ImplicitParamDecl::Other);
4539   Args.push_back(&GtidArg);
4540   Args.push_back(&TaskTypeArg);
4541   const auto &TaskEntryFnInfo =
4542       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4543   llvm::FunctionType *TaskEntryTy =
4544       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4545   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4546   auto *TaskEntry = llvm::Function::Create(
4547       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4548   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4549   TaskEntry->setDoesNotRecurse();
4550   CodeGenFunction CGF(CGM);
4551   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4552                     Loc, Loc);
4553 
4554   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4555   // tt,
4556   // For taskloops:
4557   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4558   // tt->task_data.shareds);
4559   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4560       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4561   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4562       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4563       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4564   const auto *KmpTaskTWithPrivatesQTyRD =
4565       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4566   LValue Base =
4567       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4568   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4569   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4570   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4571   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4572 
4573   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4574   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4575   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4577       CGF.ConvertTypeForMem(SharedsPtrTy));
4578 
4579   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4580   llvm::Value *PrivatesParam;
4581   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4582     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4583     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4585   } else {
4586     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4587   }
4588 
4589   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4590                                TaskPrivatesMap,
4591                                CGF.Builder
4592                                    .CreatePointerBitCastOrAddrSpaceCast(
4593                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4594                                    .getPointer()};
4595   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4596                                           std::end(CommonArgs));
4597   if (isOpenMPTaskLoopDirective(Kind)) {
4598     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4599     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4600     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4601     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4602     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4603     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4604     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4605     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4606     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4607     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4608     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4609     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4610     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4611     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4612     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4613     CallArgs.push_back(LBParam);
4614     CallArgs.push_back(UBParam);
4615     CallArgs.push_back(StParam);
4616     CallArgs.push_back(LIParam);
4617     CallArgs.push_back(RParam);
4618   }
4619   CallArgs.push_back(SharedsParam);
4620 
4621   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4622                                                   CallArgs);
4623   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4624                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4625   CGF.FinishFunction();
4626   return TaskEntry;
4627 }
4628 
4629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4630                                             SourceLocation Loc,
4631                                             QualType KmpInt32Ty,
4632                                             QualType KmpTaskTWithPrivatesPtrQTy,
4633                                             QualType KmpTaskTWithPrivatesQTy) {
4634   ASTContext &C = CGM.getContext();
4635   FunctionArgList Args;
4636   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4637                             ImplicitParamDecl::Other);
4638   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4639                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4640                                 ImplicitParamDecl::Other);
4641   Args.push_back(&GtidArg);
4642   Args.push_back(&TaskTypeArg);
4643   const auto &DestructorFnInfo =
4644       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4645   llvm::FunctionType *DestructorFnTy =
4646       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4647   std::string Name =
4648       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4649   auto *DestructorFn =
4650       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4651                              Name, &CGM.getModule());
4652   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4653                                     DestructorFnInfo);
4654   DestructorFn->setDoesNotRecurse();
4655   CodeGenFunction CGF(CGM);
4656   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4657                     Args, Loc, Loc);
4658 
4659   LValue Base = CGF.EmitLoadOfPointerLValue(
4660       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4661       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4662   const auto *KmpTaskTWithPrivatesQTyRD =
4663       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4664   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4665   Base = CGF.EmitLValueForField(Base, *FI);
4666   for (const auto *Field :
4667        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4668     if (QualType::DestructionKind DtorKind =
4669             Field->getType().isDestructedType()) {
4670       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4671       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4672     }
4673   }
4674   CGF.FinishFunction();
4675   return DestructorFn;
4676 }
4677 
4678 /// Emit a privates mapping function for correct handling of private and
4679 /// firstprivate variables.
4680 /// \code
4681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4682 /// **noalias priv1,...,  <tyn> **noalias privn) {
4683 ///   *priv1 = &.privates.priv1;
4684 ///   ...;
4685 ///   *privn = &.privates.privn;
4686 /// }
4687 /// \endcode
4688 static llvm::Value *
4689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4690                                ArrayRef<const Expr *> PrivateVars,
4691                                ArrayRef<const Expr *> FirstprivateVars,
4692                                ArrayRef<const Expr *> LastprivateVars,
4693                                QualType PrivatesQTy,
4694                                ArrayRef<PrivateDataTy> Privates) {
4695   ASTContext &C = CGM.getContext();
4696   FunctionArgList Args;
4697   ImplicitParamDecl TaskPrivatesArg(
4698       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4699       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4700       ImplicitParamDecl::Other);
4701   Args.push_back(&TaskPrivatesArg);
4702   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4703   unsigned Counter = 1;
4704   for (const Expr *E : PrivateVars) {
4705     Args.push_back(ImplicitParamDecl::Create(
4706         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4707         C.getPointerType(C.getPointerType(E->getType()))
4708             .withConst()
4709             .withRestrict(),
4710         ImplicitParamDecl::Other));
4711     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4712     PrivateVarsPos[VD] = Counter;
4713     ++Counter;
4714   }
4715   for (const Expr *E : FirstprivateVars) {
4716     Args.push_back(ImplicitParamDecl::Create(
4717         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4718         C.getPointerType(C.getPointerType(E->getType()))
4719             .withConst()
4720             .withRestrict(),
4721         ImplicitParamDecl::Other));
4722     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4723     PrivateVarsPos[VD] = Counter;
4724     ++Counter;
4725   }
4726   for (const Expr *E : LastprivateVars) {
4727     Args.push_back(ImplicitParamDecl::Create(
4728         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4729         C.getPointerType(C.getPointerType(E->getType()))
4730             .withConst()
4731             .withRestrict(),
4732         ImplicitParamDecl::Other));
4733     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4734     PrivateVarsPos[VD] = Counter;
4735     ++Counter;
4736   }
4737   const auto &TaskPrivatesMapFnInfo =
4738       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4739   llvm::FunctionType *TaskPrivatesMapTy =
4740       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4741   std::string Name =
4742       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4743   auto *TaskPrivatesMap = llvm::Function::Create(
4744       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4745       &CGM.getModule());
4746   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4747                                     TaskPrivatesMapFnInfo);
4748   if (CGM.getLangOpts().Optimize) {
4749     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4750     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4751     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4752   }
4753   CodeGenFunction CGF(CGM);
4754   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4755                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4756 
4757   // *privi = &.privates.privi;
4758   LValue Base = CGF.EmitLoadOfPointerLValue(
4759       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4760       TaskPrivatesArg.getType()->castAs<PointerType>());
4761   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4762   Counter = 0;
4763   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4764     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4765     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4766     LValue RefLVal =
4767         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4768     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4769         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4770     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4771     ++Counter;
4772   }
4773   CGF.FinishFunction();
4774   return TaskPrivatesMap;
4775 }
4776 
4777 /// Emit initialization for private variables in task-based directives.
4778 static void emitPrivatesInit(CodeGenFunction &CGF,
4779                              const OMPExecutableDirective &D,
4780                              Address KmpTaskSharedsPtr, LValue TDBase,
4781                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4782                              QualType SharedsTy, QualType SharedsPtrTy,
4783                              const OMPTaskDataTy &Data,
4784                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4785   ASTContext &C = CGF.getContext();
4786   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4787   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4788   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4789                                  ? OMPD_taskloop
4790                                  : OMPD_task;
4791   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4792   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4793   LValue SrcBase;
4794   bool IsTargetTask =
4795       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4796       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4797   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4798   // PointersArray and SizesArray. The original variables for these arrays are
4799   // not captured and we get their addresses explicitly.
4800   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4801       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4802     SrcBase = CGF.MakeAddrLValue(
4803         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4805         SharedsTy);
4806   }
4807   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4808   for (const PrivateDataTy &Pair : Privates) {
4809     const VarDecl *VD = Pair.second.PrivateCopy;
4810     const Expr *Init = VD->getAnyInitializer();
4811     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4812                              !CGF.isTrivialInitializer(Init)))) {
4813       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4814       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4815         const VarDecl *OriginalVD = Pair.second.Original;
4816         // Check if the variable is the target-based BasePointersArray,
4817         // PointersArray or SizesArray.
4818         LValue SharedRefLValue;
4819         QualType Type = PrivateLValue.getType();
4820         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4821         if (IsTargetTask && !SharedField) {
4822           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4823                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4824                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4825                          ->getNumParams() == 0 &&
4826                  isa<TranslationUnitDecl>(
4827                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4828                          ->getDeclContext()) &&
4829                  "Expected artificial target data variable.");
4830           SharedRefLValue =
4831               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4832         } else if (ForDup) {
4833           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4834           SharedRefLValue = CGF.MakeAddrLValue(
4835               Address(SharedRefLValue.getPointer(CGF),
4836                       C.getDeclAlign(OriginalVD)),
4837               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4838               SharedRefLValue.getTBAAInfo());
4839         } else if (CGF.LambdaCaptureFields.count(
4840                        Pair.second.Original->getCanonicalDecl()) > 0 ||
4841                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
4842           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
4843         } else {
4844           // Processing for implicitly captured variables.
4845           InlinedOpenMPRegionRAII Region(
4846               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4847               /*HasCancel=*/false);
4848           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
4849         }
4850         if (Type->isArrayType()) {
4851           // Initialize firstprivate array.
4852           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4853             // Perform simple memcpy.
4854             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4855           } else {
4856             // Initialize firstprivate array using element-by-element
4857             // initialization.
4858             CGF.EmitOMPAggregateAssign(
4859                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4860                 Type,
4861                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4862                                                   Address SrcElement) {
4863                   // Clean up any temporaries needed by the initialization.
4864                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4865                   InitScope.addPrivate(
4866                       Elem, [SrcElement]() -> Address { return SrcElement; });
4867                   (void)InitScope.Privatize();
4868                   // Emit initialization for single element.
4869                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4870                       CGF, &CapturesInfo);
4871                   CGF.EmitAnyExprToMem(Init, DestElement,
4872                                        Init->getType().getQualifiers(),
4873                                        /*IsInitializer=*/false);
4874                 });
4875           }
4876         } else {
4877           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4878           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4879             return SharedRefLValue.getAddress(CGF);
4880           });
4881           (void)InitScope.Privatize();
4882           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4883           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4884                              /*capturedByInit=*/false);
4885         }
4886       } else {
4887         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4888       }
4889     }
4890     ++FI;
4891   }
4892 }
4893 
4894 /// Check if duplication function is required for taskloops.
4895 static bool checkInitIsRequired(CodeGenFunction &CGF,
4896                                 ArrayRef<PrivateDataTy> Privates) {
4897   bool InitRequired = false;
4898   for (const PrivateDataTy &Pair : Privates) {
4899     const VarDecl *VD = Pair.second.PrivateCopy;
4900     const Expr *Init = VD->getAnyInitializer();
4901     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4902                                     !CGF.isTrivialInitializer(Init));
4903     if (InitRequired)
4904       break;
4905   }
4906   return InitRequired;
4907 }
4908 
4909 
4910 /// Emit task_dup function (for initialization of
4911 /// private/firstprivate/lastprivate vars and last_iter flag)
4912 /// \code
4913 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4914 /// lastpriv) {
4915 /// // setup lastprivate flag
4916 ///    task_dst->last = lastpriv;
4917 /// // could be constructor calls here...
4918 /// }
4919 /// \endcode
4920 static llvm::Value *
4921 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4922                     const OMPExecutableDirective &D,
4923                     QualType KmpTaskTWithPrivatesPtrQTy,
4924                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4925                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4926                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4927                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4928   ASTContext &C = CGM.getContext();
4929   FunctionArgList Args;
4930   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4931                            KmpTaskTWithPrivatesPtrQTy,
4932                            ImplicitParamDecl::Other);
4933   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4934                            KmpTaskTWithPrivatesPtrQTy,
4935                            ImplicitParamDecl::Other);
4936   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4937                                 ImplicitParamDecl::Other);
4938   Args.push_back(&DstArg);
4939   Args.push_back(&SrcArg);
4940   Args.push_back(&LastprivArg);
4941   const auto &TaskDupFnInfo =
4942       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4943   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4944   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4945   auto *TaskDup = llvm::Function::Create(
4946       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4947   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4948   TaskDup->setDoesNotRecurse();
4949   CodeGenFunction CGF(CGM);
4950   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4951                     Loc);
4952 
4953   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4954       CGF.GetAddrOfLocalVar(&DstArg),
4955       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4956   // task_dst->liter = lastpriv;
4957   if (WithLastIter) {
4958     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4959     LValue Base = CGF.EmitLValueForField(
4960         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4961     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4962     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4963         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4964     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4965   }
4966 
4967   // Emit initial values for private copies (if any).
4968   assert(!Privates.empty());
4969   Address KmpTaskSharedsPtr = Address::invalid();
4970   if (!Data.FirstprivateVars.empty()) {
4971     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4972         CGF.GetAddrOfLocalVar(&SrcArg),
4973         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4974     LValue Base = CGF.EmitLValueForField(
4975         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4976     KmpTaskSharedsPtr = Address(
4977         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4978                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4979                                                   KmpTaskTShareds)),
4980                              Loc),
4981         CGM.getNaturalTypeAlignment(SharedsTy));
4982   }
4983   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4984                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4985   CGF.FinishFunction();
4986   return TaskDup;
4987 }
4988 
4989 /// Checks if destructor function is required to be generated.
4990 /// \return true if cleanups are required, false otherwise.
4991 static bool
4992 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4993   bool NeedsCleanup = false;
4994   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4995   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4996   for (const FieldDecl *FD : PrivateRD->fields()) {
4997     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4998     if (NeedsCleanup)
4999       break;
5000   }
5001   return NeedsCleanup;
5002 }
5003 
5004 CGOpenMPRuntime::TaskResultTy
5005 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5006                               const OMPExecutableDirective &D,
5007                               llvm::Function *TaskFunction, QualType SharedsTy,
5008                               Address Shareds, const OMPTaskDataTy &Data) {
5009   ASTContext &C = CGM.getContext();
5010   llvm::SmallVector<PrivateDataTy, 4> Privates;
5011   // Aggregate privates and sort them by the alignment.
5012   const auto *I = Data.PrivateCopies.begin();
5013   for (const Expr *E : Data.PrivateVars) {
5014     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5015     Privates.emplace_back(
5016         C.getDeclAlign(VD),
5017         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5018                          /*PrivateElemInit=*/nullptr));
5019     ++I;
5020   }
5021   I = Data.FirstprivateCopies.begin();
5022   const auto *IElemInitRef = Data.FirstprivateInits.begin();
5023   for (const Expr *E : Data.FirstprivateVars) {
5024     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5025     Privates.emplace_back(
5026         C.getDeclAlign(VD),
5027         PrivateHelpersTy(
5028             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5029             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5030     ++I;
5031     ++IElemInitRef;
5032   }
5033   I = Data.LastprivateCopies.begin();
5034   for (const Expr *E : Data.LastprivateVars) {
5035     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5036     Privates.emplace_back(
5037         C.getDeclAlign(VD),
5038         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5039                          /*PrivateElemInit=*/nullptr));
5040     ++I;
5041   }
5042   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5043     return L.first > R.first;
5044   });
5045   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5046   // Build type kmp_routine_entry_t (if not built yet).
5047   emitKmpRoutineEntryT(KmpInt32Ty);
5048   // Build type kmp_task_t (if not built yet).
5049   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5050     if (SavedKmpTaskloopTQTy.isNull()) {
5051       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5052           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5053     }
5054     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5055   } else {
5056     assert((D.getDirectiveKind() == OMPD_task ||
5057             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5058             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5059            "Expected taskloop, task or target directive");
5060     if (SavedKmpTaskTQTy.isNull()) {
5061       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5062           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5063     }
5064     KmpTaskTQTy = SavedKmpTaskTQTy;
5065   }
5066   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5067   // Build particular struct kmp_task_t for the given task.
5068   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5069       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5070   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5071   QualType KmpTaskTWithPrivatesPtrQTy =
5072       C.getPointerType(KmpTaskTWithPrivatesQTy);
5073   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5074   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5075       KmpTaskTWithPrivatesTy->getPointerTo();
5076   llvm::Value *KmpTaskTWithPrivatesTySize =
5077       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5078   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5079 
5080   // Emit initial values for private copies (if any).
5081   llvm::Value *TaskPrivatesMap = nullptr;
5082   llvm::Type *TaskPrivatesMapTy =
5083       std::next(TaskFunction->arg_begin(), 3)->getType();
5084   if (!Privates.empty()) {
5085     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5086     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5087         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5088         FI->getType(), Privates);
5089     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5090         TaskPrivatesMap, TaskPrivatesMapTy);
5091   } else {
5092     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5093         cast<llvm::PointerType>(TaskPrivatesMapTy));
5094   }
5095   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5096   // kmp_task_t *tt);
5097   llvm::Function *TaskEntry = emitProxyTaskFunction(
5098       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5099       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5100       TaskPrivatesMap);
5101 
5102   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5103   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5104   // kmp_routine_entry_t *task_entry);
5105   // Task flags. Format is taken from
5106   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5107   // description of kmp_tasking_flags struct.
5108   enum {
5109     TiedFlag = 0x1,
5110     FinalFlag = 0x2,
5111     DestructorsFlag = 0x8,
5112     PriorityFlag = 0x20,
5113     DetachableFlag = 0x40,
5114   };
5115   unsigned Flags = Data.Tied ? TiedFlag : 0;
5116   bool NeedsCleanup = false;
5117   if (!Privates.empty()) {
5118     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5119     if (NeedsCleanup)
5120       Flags = Flags | DestructorsFlag;
5121   }
5122   if (Data.Priority.getInt())
5123     Flags = Flags | PriorityFlag;
5124   if (D.hasClausesOfKind<OMPDetachClause>())
5125     Flags = Flags | DetachableFlag;
5126   llvm::Value *TaskFlags =
5127       Data.Final.getPointer()
5128           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5129                                      CGF.Builder.getInt32(FinalFlag),
5130                                      CGF.Builder.getInt32(/*C=*/0))
5131           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5132   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5133   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5134   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5135       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5136       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5137           TaskEntry, KmpRoutineEntryPtrTy)};
5138   llvm::Value *NewTask;
5139   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5140     // Check if we have any device clause associated with the directive.
5141     const Expr *Device = nullptr;
5142     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5143       Device = C->getDevice();
5144     // Emit device ID if any otherwise use default value.
5145     llvm::Value *DeviceID;
5146     if (Device)
5147       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5148                                            CGF.Int64Ty, /*isSigned=*/true);
5149     else
5150       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5151     AllocArgs.push_back(DeviceID);
5152     NewTask = CGF.EmitRuntimeCall(
5153       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5154   } else {
5155     NewTask = CGF.EmitRuntimeCall(
5156       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5157   }
5158   // Emit detach clause initialization.
5159   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
5160   // task_descriptor);
5161   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
5162     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
5163     LValue EvtLVal = CGF.EmitLValue(Evt);
5164 
5165     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
5166     // int gtid, kmp_task_t *task);
5167     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
5168     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
5169     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
5170     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
5171         createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event),
5172         {Loc, Tid, NewTask});
5173     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
5174                                       Evt->getExprLoc());
5175     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
5176   }
5177   llvm::Value *NewTaskNewTaskTTy =
5178       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5179           NewTask, KmpTaskTWithPrivatesPtrTy);
5180   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5181                                                KmpTaskTWithPrivatesQTy);
5182   LValue TDBase =
5183       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5184   // Fill the data in the resulting kmp_task_t record.
5185   // Copy shareds if there are any.
5186   Address KmpTaskSharedsPtr = Address::invalid();
5187   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5188     KmpTaskSharedsPtr =
5189         Address(CGF.EmitLoadOfScalar(
5190                     CGF.EmitLValueForField(
5191                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5192                                            KmpTaskTShareds)),
5193                     Loc),
5194                 CGM.getNaturalTypeAlignment(SharedsTy));
5195     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5196     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5197     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5198   }
5199   // Emit initial values for private copies (if any).
5200   TaskResultTy Result;
5201   if (!Privates.empty()) {
5202     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5203                      SharedsTy, SharedsPtrTy, Data, Privates,
5204                      /*ForDup=*/false);
5205     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5206         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5207       Result.TaskDupFn = emitTaskDupFunction(
5208           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5209           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5210           /*WithLastIter=*/!Data.LastprivateVars.empty());
5211     }
5212   }
5213   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5214   enum { Priority = 0, Destructors = 1 };
5215   // Provide pointer to function with destructors for privates.
5216   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5217   const RecordDecl *KmpCmplrdataUD =
5218       (*FI)->getType()->getAsUnionType()->getDecl();
5219   if (NeedsCleanup) {
5220     llvm::Value *DestructorFn = emitDestructorsFunction(
5221         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5222         KmpTaskTWithPrivatesQTy);
5223     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5224     LValue DestructorsLV = CGF.EmitLValueForField(
5225         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5226     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5227                               DestructorFn, KmpRoutineEntryPtrTy),
5228                           DestructorsLV);
5229   }
5230   // Set priority.
5231   if (Data.Priority.getInt()) {
5232     LValue Data2LV = CGF.EmitLValueForField(
5233         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5234     LValue PriorityLV = CGF.EmitLValueForField(
5235         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5236     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5237   }
5238   Result.NewTask = NewTask;
5239   Result.TaskEntry = TaskEntry;
5240   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5241   Result.TDBase = TDBase;
5242   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5243   return Result;
5244 }
5245 
5246 namespace {
5247 /// Dependence kind for RTL.
5248 enum RTLDependenceKindTy {
5249   DepIn = 0x01,
5250   DepInOut = 0x3,
5251   DepMutexInOutSet = 0x4
5252 };
5253 /// Fields ids in kmp_depend_info record.
5254 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5255 } // namespace
5256 
5257 /// Translates internal dependency kind into the runtime kind.
5258 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5259   RTLDependenceKindTy DepKind;
5260   switch (K) {
5261   case OMPC_DEPEND_in:
5262     DepKind = DepIn;
5263     break;
5264   // Out and InOut dependencies must use the same code.
5265   case OMPC_DEPEND_out:
5266   case OMPC_DEPEND_inout:
5267     DepKind = DepInOut;
5268     break;
5269   case OMPC_DEPEND_mutexinoutset:
5270     DepKind = DepMutexInOutSet;
5271     break;
5272   case OMPC_DEPEND_source:
5273   case OMPC_DEPEND_sink:
5274   case OMPC_DEPEND_depobj:
5275   case OMPC_DEPEND_unknown:
5276     llvm_unreachable("Unknown task dependence type");
5277   }
5278   return DepKind;
5279 }
5280 
5281 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5282 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5283                            QualType &FlagsTy) {
5284   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5285   if (KmpDependInfoTy.isNull()) {
5286     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5287     KmpDependInfoRD->startDefinition();
5288     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5289     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5290     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5291     KmpDependInfoRD->completeDefinition();
5292     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5293   }
5294 }
5295 
5296 std::pair<llvm::Value *, LValue>
5297 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5298                                    SourceLocation Loc) {
5299   ASTContext &C = CGM.getContext();
5300   QualType FlagsTy;
5301   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5302   RecordDecl *KmpDependInfoRD =
5303       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5304   LValue Base = CGF.EmitLoadOfPointerLValue(
5305       DepobjLVal.getAddress(CGF),
5306       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5307   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5308   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5309           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5310   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5311                             Base.getTBAAInfo());
5312   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5313       Addr.getPointer(),
5314       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5315   LValue NumDepsBase = CGF.MakeAddrLValue(
5316       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5317       Base.getBaseInfo(), Base.getTBAAInfo());
5318   // NumDeps = deps[i].base_addr;
5319   LValue BaseAddrLVal = CGF.EmitLValueForField(
5320       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5321   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5322   return std::make_pair(NumDeps, Base);
5323 }
5324 
5325 namespace {
5326 /// Loop generator for OpenMP iterator expression.
5327 class OMPIteratorGeneratorScope final
5328     : public CodeGenFunction::OMPPrivateScope {
5329   CodeGenFunction &CGF;
5330   const OMPIteratorExpr *E = nullptr;
5331   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
5332   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
5333   OMPIteratorGeneratorScope() = delete;
5334   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
5335 
5336 public:
5337   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
5338       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
5339     if (!E)
5340       return;
5341     SmallVector<llvm::Value *, 4> Uppers;
5342     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5343       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
5344       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
5345       addPrivate(VD, [&CGF, VD]() {
5346         return CGF.CreateMemTemp(VD->getType(), VD->getName());
5347       });
5348       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5349       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
5350         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
5351                                  "counter.addr");
5352       });
5353     }
5354     Privatize();
5355 
5356     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5357       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5358       LValue CLVal =
5359           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
5360                              HelperData.CounterVD->getType());
5361       // Counter = 0;
5362       CGF.EmitStoreOfScalar(
5363           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
5364           CLVal);
5365       CodeGenFunction::JumpDest &ContDest =
5366           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
5367       CodeGenFunction::JumpDest &ExitDest =
5368           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
5369       // N = <number-of_iterations>;
5370       llvm::Value *N = Uppers[I];
5371       // cont:
5372       // if (Counter < N) goto body; else goto exit;
5373       CGF.EmitBlock(ContDest.getBlock());
5374       auto *CVal =
5375           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
5376       llvm::Value *Cmp =
5377           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
5378               ? CGF.Builder.CreateICmpSLT(CVal, N)
5379               : CGF.Builder.CreateICmpULT(CVal, N);
5380       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
5381       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
5382       // body:
5383       CGF.EmitBlock(BodyBB);
5384       // Iteri = Begini + Counter * Stepi;
5385       CGF.EmitIgnoredExpr(HelperData.Update);
5386     }
5387   }
5388   ~OMPIteratorGeneratorScope() {
5389     if (!E)
5390       return;
5391     for (unsigned I = E->numOfIterators(); I > 0; --I) {
5392       // Counter = Counter + 1;
5393       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
5394       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
5395       // goto cont;
5396       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
5397       // exit:
5398       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
5399     }
5400   }
5401 };
5402 } // namespace
5403 
5404 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5405                            llvm::PointerUnion<unsigned *, LValue *> Pos,
5406                            const OMPTaskDataTy::DependData &Data,
5407                            Address DependenciesArray) {
5408   CodeGenModule &CGM = CGF.CGM;
5409   ASTContext &C = CGM.getContext();
5410   QualType FlagsTy;
5411   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5412   RecordDecl *KmpDependInfoRD =
5413       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5414   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5415 
5416   OMPIteratorGeneratorScope IteratorScope(
5417       CGF, cast_or_null<OMPIteratorExpr>(
5418                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5419                                  : nullptr));
5420   for (const Expr *E : Data.DepExprs) {
5421     const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
5422     llvm::Value *Addr;
5423     if (OASE) {
5424       const Expr *Base = OASE->getBase();
5425       Addr = CGF.EmitScalarExpr(Base);
5426     } else {
5427       Addr = CGF.EmitLValue(E).getPointer(CGF);
5428     }
5429     llvm::Value *Size;
5430     QualType Ty = E->getType();
5431     if (OASE) {
5432       Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
5433       for (const Expr *SE : OASE->getDimensions()) {
5434         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
5435         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
5436                                       CGF.getContext().getSizeType(),
5437                                       SE->getExprLoc());
5438         Size = CGF.Builder.CreateNUWMul(Size, Sz);
5439       }
5440     } else if (const auto *ASE =
5441                    dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5442       LValue UpAddrLVal =
5443           CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5444       llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5445           UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5446       llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
5447       llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5448       Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5449     } else {
5450       Size = CGF.getTypeSize(Ty);
5451     }
5452     LValue Base;
5453     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5454       Base = CGF.MakeAddrLValue(
5455           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
5456     } else {
5457       LValue &PosLVal = *Pos.get<LValue *>();
5458       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5459       Base = CGF.MakeAddrLValue(
5460           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
5461                   DependenciesArray.getAlignment()),
5462           KmpDependInfoTy);
5463     }
5464     // deps[i].base_addr = &<Dependencies[i].second>;
5465     LValue BaseAddrLVal = CGF.EmitLValueForField(
5466         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5467     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
5468                           BaseAddrLVal);
5469     // deps[i].len = sizeof(<Dependencies[i].second>);
5470     LValue LenLVal = CGF.EmitLValueForField(
5471         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5472     CGF.EmitStoreOfScalar(Size, LenLVal);
5473     // deps[i].flags = <Dependencies[i].first>;
5474     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
5475     LValue FlagsLVal = CGF.EmitLValueForField(
5476         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5477     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5478                           FlagsLVal);
5479     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5480       ++(*P);
5481     } else {
5482       LValue &PosLVal = *Pos.get<LValue *>();
5483       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5484       Idx = CGF.Builder.CreateNUWAdd(Idx,
5485                                      llvm::ConstantInt::get(Idx->getType(), 1));
5486       CGF.EmitStoreOfScalar(Idx, PosLVal);
5487     }
5488   }
5489 }
5490 
5491 static SmallVector<llvm::Value *, 4>
5492 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5493                         const OMPTaskDataTy::DependData &Data) {
5494   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5495          "Expected depobj dependecy kind.");
5496   SmallVector<llvm::Value *, 4> Sizes;
5497   SmallVector<LValue, 4> SizeLVals;
5498   ASTContext &C = CGF.getContext();
5499   QualType FlagsTy;
5500   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5501   RecordDecl *KmpDependInfoRD =
5502       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5503   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5504   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5505   {
5506     OMPIteratorGeneratorScope IteratorScope(
5507         CGF, cast_or_null<OMPIteratorExpr>(
5508                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5509                                    : nullptr));
5510     for (const Expr *E : Data.DepExprs) {
5511       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5512       LValue Base = CGF.EmitLoadOfPointerLValue(
5513           DepobjLVal.getAddress(CGF),
5514           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5515       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5516           Base.getAddress(CGF), KmpDependInfoPtrT);
5517       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5518                                 Base.getTBAAInfo());
5519       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5520           Addr.getPointer(),
5521           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5522       LValue NumDepsBase = CGF.MakeAddrLValue(
5523           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5524           Base.getBaseInfo(), Base.getTBAAInfo());
5525       // NumDeps = deps[i].base_addr;
5526       LValue BaseAddrLVal = CGF.EmitLValueForField(
5527           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5528       llvm::Value *NumDeps =
5529           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5530       LValue NumLVal = CGF.MakeAddrLValue(
5531           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
5532           C.getUIntPtrType());
5533       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
5534                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
5535       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
5536       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
5537       CGF.EmitStoreOfScalar(Add, NumLVal);
5538       SizeLVals.push_back(NumLVal);
5539     }
5540   }
5541   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
5542     llvm::Value *Size =
5543         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
5544     Sizes.push_back(Size);
5545   }
5546   return Sizes;
5547 }
5548 
5549 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5550                                LValue PosLVal,
5551                                const OMPTaskDataTy::DependData &Data,
5552                                Address DependenciesArray) {
5553   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5554          "Expected depobj dependecy kind.");
5555   ASTContext &C = CGF.getContext();
5556   QualType FlagsTy;
5557   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5558   RecordDecl *KmpDependInfoRD =
5559       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5560   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5561   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5562   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5563   {
5564     OMPIteratorGeneratorScope IteratorScope(
5565         CGF, cast_or_null<OMPIteratorExpr>(
5566                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5567                                    : nullptr));
5568     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
5569       const Expr *E = Data.DepExprs[I];
5570       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5571       LValue Base = CGF.EmitLoadOfPointerLValue(
5572           DepobjLVal.getAddress(CGF),
5573           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5574       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5575           Base.getAddress(CGF), KmpDependInfoPtrT);
5576       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5577                                 Base.getTBAAInfo());
5578 
5579       // Get number of elements in a single depobj.
5580       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5581           Addr.getPointer(),
5582           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5583       LValue NumDepsBase = CGF.MakeAddrLValue(
5584           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5585           Base.getBaseInfo(), Base.getTBAAInfo());
5586       // NumDeps = deps[i].base_addr;
5587       LValue BaseAddrLVal = CGF.EmitLValueForField(
5588           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5589       llvm::Value *NumDeps =
5590           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5591 
5592       // memcopy dependency data.
5593       llvm::Value *Size = CGF.Builder.CreateNUWMul(
5594           ElSize,
5595           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
5596       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5597       Address DepAddr =
5598           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
5599                   DependenciesArray.getAlignment());
5600       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
5601 
5602       // Increase pos.
5603       // pos += size;
5604       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
5605       CGF.EmitStoreOfScalar(Add, PosLVal);
5606     }
5607   }
5608 }
5609 
5610 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5611     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
5612     SourceLocation Loc) {
5613   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
5614         return D.DepExprs.empty();
5615       }))
5616     return std::make_pair(nullptr, Address::invalid());
5617   // Process list of dependencies.
5618   ASTContext &C = CGM.getContext();
5619   Address DependenciesArray = Address::invalid();
5620   llvm::Value *NumOfElements = nullptr;
5621   unsigned NumDependencies = std::accumulate(
5622       Dependencies.begin(), Dependencies.end(), 0,
5623       [](unsigned V, const OMPTaskDataTy::DependData &D) {
5624         return D.DepKind == OMPC_DEPEND_depobj
5625                    ? V
5626                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
5627       });
5628   QualType FlagsTy;
5629   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5630   bool HasDepobjDeps = false;
5631   bool HasRegularWithIterators = false;
5632   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5633   llvm::Value *NumOfRegularWithIterators =
5634       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
5635   // Calculate number of depobj dependecies and regular deps with the iterators.
5636   for (const OMPTaskDataTy::DependData &D : Dependencies) {
5637     if (D.DepKind == OMPC_DEPEND_depobj) {
5638       SmallVector<llvm::Value *, 4> Sizes =
5639           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
5640       for (llvm::Value *Size : Sizes) {
5641         NumOfDepobjElements =
5642             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
5643       }
5644       HasDepobjDeps = true;
5645       continue;
5646     }
5647     // Include number of iterations, if any.
5648     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
5649       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5650         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5651         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
5652         NumOfRegularWithIterators =
5653             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
5654       }
5655       HasRegularWithIterators = true;
5656       continue;
5657     }
5658   }
5659 
5660   QualType KmpDependInfoArrayTy;
5661   if (HasDepobjDeps || HasRegularWithIterators) {
5662     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
5663                                            /*isSigned=*/false);
5664     if (HasDepobjDeps) {
5665       NumOfElements =
5666           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
5667     }
5668     if (HasRegularWithIterators) {
5669       NumOfElements =
5670           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
5671     }
5672     OpaqueValueExpr OVE(Loc,
5673                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5674                         VK_RValue);
5675     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5676                                                   RValue::get(NumOfElements));
5677     KmpDependInfoArrayTy =
5678         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5679                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5680     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5681     // Properly emit variable-sized array.
5682     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5683                                          ImplicitParamDecl::Other);
5684     CGF.EmitVarDecl(*PD);
5685     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5686     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5687                                               /*isSigned=*/false);
5688   } else {
5689     KmpDependInfoArrayTy = C.getConstantArrayType(
5690         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
5691         ArrayType::Normal, /*IndexTypeQuals=*/0);
5692     DependenciesArray =
5693         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5694     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
5695     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5696                                            /*isSigned=*/false);
5697   }
5698   unsigned Pos = 0;
5699   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5700     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5701         Dependencies[I].IteratorExpr)
5702       continue;
5703     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
5704                    DependenciesArray);
5705   }
5706   // Copy regular dependecies with iterators.
5707   LValue PosLVal = CGF.MakeAddrLValue(
5708       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
5709   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
5710   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5711     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5712         !Dependencies[I].IteratorExpr)
5713       continue;
5714     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
5715                    DependenciesArray);
5716   }
5717   // Copy final depobj arrays without iterators.
5718   if (HasDepobjDeps) {
5719     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5720       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
5721         continue;
5722       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
5723                          DependenciesArray);
5724     }
5725   }
5726   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5727       DependenciesArray, CGF.VoidPtrTy);
5728   return std::make_pair(NumOfElements, DependenciesArray);
5729 }
5730 
5731 Address CGOpenMPRuntime::emitDepobjDependClause(
5732     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
5733     SourceLocation Loc) {
5734   if (Dependencies.DepExprs.empty())
5735     return Address::invalid();
5736   // Process list of dependencies.
5737   ASTContext &C = CGM.getContext();
5738   Address DependenciesArray = Address::invalid();
5739   unsigned NumDependencies = Dependencies.DepExprs.size();
5740   QualType FlagsTy;
5741   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5742   RecordDecl *KmpDependInfoRD =
5743       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5744 
5745   llvm::Value *Size;
5746   // Define type kmp_depend_info[<Dependencies.size()>];
5747   // For depobj reserve one extra element to store the number of elements.
5748   // It is required to handle depobj(x) update(in) construct.
5749   // kmp_depend_info[<Dependencies.size()>] deps;
5750   llvm::Value *NumDepsVal;
5751   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5752   if (const auto *IE =
5753           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5754     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5755     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5756       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5757       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5758       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5759     }
5760     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5761                                     NumDepsVal);
5762     CharUnits SizeInBytes =
5763         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5764     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5765     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5766     NumDepsVal =
5767         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5768   } else {
5769     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5770         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5771         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5772     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5773     Size = CGM.getSize(Sz.alignTo(Align));
5774     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5775   }
5776   // Need to allocate on the dynamic memory.
5777   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5778   // Use default allocator.
5779   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5780   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5781 
5782   llvm::Value *Addr = CGF.EmitRuntimeCall(
5783       createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5784   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5785       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5786   DependenciesArray = Address(Addr, Align);
5787   // Write number of elements in the first element of array for depobj.
5788   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5789   // deps[i].base_addr = NumDependencies;
5790   LValue BaseAddrLVal = CGF.EmitLValueForField(
5791       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5792   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5793   llvm::PointerUnion<unsigned *, LValue *> Pos;
5794   unsigned Idx = 1;
5795   LValue PosLVal;
5796   if (Dependencies.IteratorExpr) {
5797     PosLVal = CGF.MakeAddrLValue(
5798         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5799         C.getSizeType());
5800     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5801                           /*IsInit=*/true);
5802     Pos = &PosLVal;
5803   } else {
5804     Pos = &Idx;
5805   }
5806   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5807   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5808       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5809   return DependenciesArray;
5810 }
5811 
5812 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5813                                         SourceLocation Loc) {
5814   ASTContext &C = CGM.getContext();
5815   QualType FlagsTy;
5816   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5817   LValue Base = CGF.EmitLoadOfPointerLValue(
5818       DepobjLVal.getAddress(CGF),
5819       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5820   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5821   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5822       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5823   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5824       Addr.getPointer(),
5825       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5826   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5827                                                                CGF.VoidPtrTy);
5828   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5829   // Use default allocator.
5830   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5831   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5832 
5833   // _kmpc_free(gtid, addr, nullptr);
5834   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5835 }
5836 
5837 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5838                                        OpenMPDependClauseKind NewDepKind,
5839                                        SourceLocation Loc) {
5840   ASTContext &C = CGM.getContext();
5841   QualType FlagsTy;
5842   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5843   RecordDecl *KmpDependInfoRD =
5844       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5845   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5846   llvm::Value *NumDeps;
5847   LValue Base;
5848   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5849 
5850   Address Begin = Base.getAddress(CGF);
5851   // Cast from pointer to array type to pointer to single element.
5852   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5853   // The basic structure here is a while-do loop.
5854   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5855   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5856   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5857   CGF.EmitBlock(BodyBB);
5858   llvm::PHINode *ElementPHI =
5859       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5860   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5861   Begin = Address(ElementPHI, Begin.getAlignment());
5862   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5863                             Base.getTBAAInfo());
5864   // deps[i].flags = NewDepKind;
5865   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5866   LValue FlagsLVal = CGF.EmitLValueForField(
5867       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5868   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5869                         FlagsLVal);
5870 
5871   // Shift the address forward by one element.
5872   Address ElementNext =
5873       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5874   ElementPHI->addIncoming(ElementNext.getPointer(),
5875                           CGF.Builder.GetInsertBlock());
5876   llvm::Value *IsEmpty =
5877       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5878   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5879   // Done.
5880   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5881 }
5882 
5883 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5884                                    const OMPExecutableDirective &D,
5885                                    llvm::Function *TaskFunction,
5886                                    QualType SharedsTy, Address Shareds,
5887                                    const Expr *IfCond,
5888                                    const OMPTaskDataTy &Data) {
5889   if (!CGF.HaveInsertPoint())
5890     return;
5891 
5892   TaskResultTy Result =
5893       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5894   llvm::Value *NewTask = Result.NewTask;
5895   llvm::Function *TaskEntry = Result.TaskEntry;
5896   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5897   LValue TDBase = Result.TDBase;
5898   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5899   // Process list of dependences.
5900   Address DependenciesArray = Address::invalid();
5901   llvm::Value *NumOfElements;
5902   std::tie(NumOfElements, DependenciesArray) =
5903       emitDependClause(CGF, Data.Dependences, Loc);
5904 
5905   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5906   // libcall.
5907   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5908   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5909   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5910   // list is not empty
5911   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5912   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5913   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5914   llvm::Value *DepTaskArgs[7];
5915   if (!Data.Dependences.empty()) {
5916     DepTaskArgs[0] = UpLoc;
5917     DepTaskArgs[1] = ThreadID;
5918     DepTaskArgs[2] = NewTask;
5919     DepTaskArgs[3] = NumOfElements;
5920     DepTaskArgs[4] = DependenciesArray.getPointer();
5921     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5922     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5923   }
5924   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5925                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5926     if (!Data.Tied) {
5927       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5928       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5929       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5930     }
5931     if (!Data.Dependences.empty()) {
5932       CGF.EmitRuntimeCall(
5933           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5934     } else {
5935       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5936                           TaskArgs);
5937     }
5938     // Check if parent region is untied and build return for untied task;
5939     if (auto *Region =
5940             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5941       Region->emitUntiedSwitch(CGF);
5942   };
5943 
5944   llvm::Value *DepWaitTaskArgs[6];
5945   if (!Data.Dependences.empty()) {
5946     DepWaitTaskArgs[0] = UpLoc;
5947     DepWaitTaskArgs[1] = ThreadID;
5948     DepWaitTaskArgs[2] = NumOfElements;
5949     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5950     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5951     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5952   }
5953   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5954                         &Data, &DepWaitTaskArgs,
5955                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5956     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5957     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5958     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5959     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5960     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5961     // is specified.
5962     if (!Data.Dependences.empty())
5963       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5964                           DepWaitTaskArgs);
5965     // Call proxy_task_entry(gtid, new_task);
5966     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5967                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5968       Action.Enter(CGF);
5969       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5970       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5971                                                           OutlinedFnArgs);
5972     };
5973 
5974     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5975     // kmp_task_t *new_task);
5976     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5977     // kmp_task_t *new_task);
5978     RegionCodeGenTy RCG(CodeGen);
5979     CommonActionTy Action(
5980         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5981         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5982     RCG.setAction(Action);
5983     RCG(CGF);
5984   };
5985 
5986   if (IfCond) {
5987     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5988   } else {
5989     RegionCodeGenTy ThenRCG(ThenCodeGen);
5990     ThenRCG(CGF);
5991   }
5992 }
5993 
5994 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5995                                        const OMPLoopDirective &D,
5996                                        llvm::Function *TaskFunction,
5997                                        QualType SharedsTy, Address Shareds,
5998                                        const Expr *IfCond,
5999                                        const OMPTaskDataTy &Data) {
6000   if (!CGF.HaveInsertPoint())
6001     return;
6002   TaskResultTy Result =
6003       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
6004   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
6005   // libcall.
6006   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
6007   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
6008   // sched, kmp_uint64 grainsize, void *task_dup);
6009   llvm::Value *ThreadID = getThreadID(CGF, Loc);
6010   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6011   llvm::Value *IfVal;
6012   if (IfCond) {
6013     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
6014                                       /*isSigned=*/true);
6015   } else {
6016     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
6017   }
6018 
6019   LValue LBLVal = CGF.EmitLValueForField(
6020       Result.TDBase,
6021       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
6022   const auto *LBVar =
6023       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
6024   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
6025                        LBLVal.getQuals(),
6026                        /*IsInitializer=*/true);
6027   LValue UBLVal = CGF.EmitLValueForField(
6028       Result.TDBase,
6029       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
6030   const auto *UBVar =
6031       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
6032   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
6033                        UBLVal.getQuals(),
6034                        /*IsInitializer=*/true);
6035   LValue StLVal = CGF.EmitLValueForField(
6036       Result.TDBase,
6037       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
6038   const auto *StVar =
6039       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
6040   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
6041                        StLVal.getQuals(),
6042                        /*IsInitializer=*/true);
6043   // Store reductions address.
6044   LValue RedLVal = CGF.EmitLValueForField(
6045       Result.TDBase,
6046       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
6047   if (Data.Reductions) {
6048     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
6049   } else {
6050     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
6051                                CGF.getContext().VoidPtrTy);
6052   }
6053   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
6054   llvm::Value *TaskArgs[] = {
6055       UpLoc,
6056       ThreadID,
6057       Result.NewTask,
6058       IfVal,
6059       LBLVal.getPointer(CGF),
6060       UBLVal.getPointer(CGF),
6061       CGF.EmitLoadOfScalar(StLVal, Loc),
6062       llvm::ConstantInt::getSigned(
6063           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
6064       llvm::ConstantInt::getSigned(
6065           CGF.IntTy, Data.Schedule.getPointer()
6066                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
6067                          : NoSchedule),
6068       Data.Schedule.getPointer()
6069           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
6070                                       /*isSigned=*/false)
6071           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
6072       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6073                              Result.TaskDupFn, CGF.VoidPtrTy)
6074                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
6075   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
6076 }
6077 
6078 /// Emit reduction operation for each element of array (required for
6079 /// array sections) LHS op = RHS.
6080 /// \param Type Type of array.
6081 /// \param LHSVar Variable on the left side of the reduction operation
6082 /// (references element of array in original variable).
6083 /// \param RHSVar Variable on the right side of the reduction operation
6084 /// (references element of array in original variable).
6085 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
6086 /// RHSVar.
6087 static void EmitOMPAggregateReduction(
6088     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
6089     const VarDecl *RHSVar,
6090     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
6091                                   const Expr *, const Expr *)> &RedOpGen,
6092     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
6093     const Expr *UpExpr = nullptr) {
6094   // Perform element-by-element initialization.
6095   QualType ElementTy;
6096   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
6097   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
6098 
6099   // Drill down to the base element type on both arrays.
6100   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
6101   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
6102 
6103   llvm::Value *RHSBegin = RHSAddr.getPointer();
6104   llvm::Value *LHSBegin = LHSAddr.getPointer();
6105   // Cast from pointer to array type to pointer to single element.
6106   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
6107   // The basic structure here is a while-do loop.
6108   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
6109   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
6110   llvm::Value *IsEmpty =
6111       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
6112   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
6113 
6114   // Enter the loop body, making that address the current address.
6115   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
6116   CGF.EmitBlock(BodyBB);
6117 
6118   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
6119 
6120   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
6121       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
6122   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
6123   Address RHSElementCurrent =
6124       Address(RHSElementPHI,
6125               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6126 
6127   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
6128       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
6129   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
6130   Address LHSElementCurrent =
6131       Address(LHSElementPHI,
6132               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6133 
6134   // Emit copy.
6135   CodeGenFunction::OMPPrivateScope Scope(CGF);
6136   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
6137   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
6138   Scope.Privatize();
6139   RedOpGen(CGF, XExpr, EExpr, UpExpr);
6140   Scope.ForceCleanup();
6141 
6142   // Shift the address forward by one element.
6143   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
6144       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
6145   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
6146       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
6147   // Check whether we've reached the end.
6148   llvm::Value *Done =
6149       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
6150   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
6151   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
6152   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
6153 
6154   // Done.
6155   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
6156 }
6157 
6158 /// Emit reduction combiner. If the combiner is a simple expression emit it as
6159 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
6160 /// UDR combiner function.
6161 static void emitReductionCombiner(CodeGenFunction &CGF,
6162                                   const Expr *ReductionOp) {
6163   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
6164     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
6165       if (const auto *DRE =
6166               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
6167         if (const auto *DRD =
6168                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
6169           std::pair<llvm::Function *, llvm::Function *> Reduction =
6170               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
6171           RValue Func = RValue::get(Reduction.first);
6172           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
6173           CGF.EmitIgnoredExpr(ReductionOp);
6174           return;
6175         }
6176   CGF.EmitIgnoredExpr(ReductionOp);
6177 }
6178 
6179 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
6180     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
6181     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
6182     ArrayRef<const Expr *> ReductionOps) {
6183   ASTContext &C = CGM.getContext();
6184 
6185   // void reduction_func(void *LHSArg, void *RHSArg);
6186   FunctionArgList Args;
6187   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6188                            ImplicitParamDecl::Other);
6189   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6190                            ImplicitParamDecl::Other);
6191   Args.push_back(&LHSArg);
6192   Args.push_back(&RHSArg);
6193   const auto &CGFI =
6194       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6195   std::string Name = getName({"omp", "reduction", "reduction_func"});
6196   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
6197                                     llvm::GlobalValue::InternalLinkage, Name,
6198                                     &CGM.getModule());
6199   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
6200   Fn->setDoesNotRecurse();
6201   CodeGenFunction CGF(CGM);
6202   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
6203 
6204   // Dst = (void*[n])(LHSArg);
6205   // Src = (void*[n])(RHSArg);
6206   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6207       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
6208       ArgsType), CGF.getPointerAlign());
6209   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6210       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
6211       ArgsType), CGF.getPointerAlign());
6212 
6213   //  ...
6214   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
6215   //  ...
6216   CodeGenFunction::OMPPrivateScope Scope(CGF);
6217   auto IPriv = Privates.begin();
6218   unsigned Idx = 0;
6219   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
6220     const auto *RHSVar =
6221         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
6222     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
6223       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
6224     });
6225     const auto *LHSVar =
6226         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
6227     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
6228       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
6229     });
6230     QualType PrivTy = (*IPriv)->getType();
6231     if (PrivTy->isVariablyModifiedType()) {
6232       // Get array size and emit VLA type.
6233       ++Idx;
6234       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
6235       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
6236       const VariableArrayType *VLA =
6237           CGF.getContext().getAsVariableArrayType(PrivTy);
6238       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
6239       CodeGenFunction::OpaqueValueMapping OpaqueMap(
6240           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
6241       CGF.EmitVariablyModifiedType(PrivTy);
6242     }
6243   }
6244   Scope.Privatize();
6245   IPriv = Privates.begin();
6246   auto ILHS = LHSExprs.begin();
6247   auto IRHS = RHSExprs.begin();
6248   for (const Expr *E : ReductionOps) {
6249     if ((*IPriv)->getType()->isArrayType()) {
6250       // Emit reduction for array section.
6251       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6252       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6253       EmitOMPAggregateReduction(
6254           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6255           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6256             emitReductionCombiner(CGF, E);
6257           });
6258     } else {
6259       // Emit reduction for array subscript or single variable.
6260       emitReductionCombiner(CGF, E);
6261     }
6262     ++IPriv;
6263     ++ILHS;
6264     ++IRHS;
6265   }
6266   Scope.ForceCleanup();
6267   CGF.FinishFunction();
6268   return Fn;
6269 }
6270 
6271 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
6272                                                   const Expr *ReductionOp,
6273                                                   const Expr *PrivateRef,
6274                                                   const DeclRefExpr *LHS,
6275                                                   const DeclRefExpr *RHS) {
6276   if (PrivateRef->getType()->isArrayType()) {
6277     // Emit reduction for array section.
6278     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
6279     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
6280     EmitOMPAggregateReduction(
6281         CGF, PrivateRef->getType(), LHSVar, RHSVar,
6282         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6283           emitReductionCombiner(CGF, ReductionOp);
6284         });
6285   } else {
6286     // Emit reduction for array subscript or single variable.
6287     emitReductionCombiner(CGF, ReductionOp);
6288   }
6289 }
6290 
6291 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
6292                                     ArrayRef<const Expr *> Privates,
6293                                     ArrayRef<const Expr *> LHSExprs,
6294                                     ArrayRef<const Expr *> RHSExprs,
6295                                     ArrayRef<const Expr *> ReductionOps,
6296                                     ReductionOptionsTy Options) {
6297   if (!CGF.HaveInsertPoint())
6298     return;
6299 
6300   bool WithNowait = Options.WithNowait;
6301   bool SimpleReduction = Options.SimpleReduction;
6302 
6303   // Next code should be emitted for reduction:
6304   //
6305   // static kmp_critical_name lock = { 0 };
6306   //
6307   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
6308   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
6309   //  ...
6310   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
6311   //  *(Type<n>-1*)rhs[<n>-1]);
6312   // }
6313   //
6314   // ...
6315   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
6316   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6317   // RedList, reduce_func, &<lock>)) {
6318   // case 1:
6319   //  ...
6320   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6321   //  ...
6322   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6323   // break;
6324   // case 2:
6325   //  ...
6326   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6327   //  ...
6328   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
6329   // break;
6330   // default:;
6331   // }
6332   //
6333   // if SimpleReduction is true, only the next code is generated:
6334   //  ...
6335   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6336   //  ...
6337 
6338   ASTContext &C = CGM.getContext();
6339 
6340   if (SimpleReduction) {
6341     CodeGenFunction::RunCleanupsScope Scope(CGF);
6342     auto IPriv = Privates.begin();
6343     auto ILHS = LHSExprs.begin();
6344     auto IRHS = RHSExprs.begin();
6345     for (const Expr *E : ReductionOps) {
6346       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6347                                   cast<DeclRefExpr>(*IRHS));
6348       ++IPriv;
6349       ++ILHS;
6350       ++IRHS;
6351     }
6352     return;
6353   }
6354 
6355   // 1. Build a list of reduction variables.
6356   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
6357   auto Size = RHSExprs.size();
6358   for (const Expr *E : Privates) {
6359     if (E->getType()->isVariablyModifiedType())
6360       // Reserve place for array size.
6361       ++Size;
6362   }
6363   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6364   QualType ReductionArrayTy =
6365       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6366                              /*IndexTypeQuals=*/0);
6367   Address ReductionList =
6368       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6369   auto IPriv = Privates.begin();
6370   unsigned Idx = 0;
6371   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6372     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6373     CGF.Builder.CreateStore(
6374         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6375             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6376         Elem);
6377     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6378       // Store array size.
6379       ++Idx;
6380       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6381       llvm::Value *Size = CGF.Builder.CreateIntCast(
6382           CGF.getVLASize(
6383                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6384               .NumElts,
6385           CGF.SizeTy, /*isSigned=*/false);
6386       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6387                               Elem);
6388     }
6389   }
6390 
6391   // 2. Emit reduce_func().
6392   llvm::Function *ReductionFn = emitReductionFunction(
6393       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6394       LHSExprs, RHSExprs, ReductionOps);
6395 
6396   // 3. Create static kmp_critical_name lock = { 0 };
6397   std::string Name = getName({"reduction"});
6398   llvm::Value *Lock = getCriticalRegionLock(Name);
6399 
6400   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6401   // RedList, reduce_func, &<lock>);
6402   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6403   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6404   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6405   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6406       ReductionList.getPointer(), CGF.VoidPtrTy);
6407   llvm::Value *Args[] = {
6408       IdentTLoc,                             // ident_t *<loc>
6409       ThreadId,                              // i32 <gtid>
6410       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6411       ReductionArrayTySize,                  // size_type sizeof(RedList)
6412       RL,                                    // void *RedList
6413       ReductionFn, // void (*) (void *, void *) <reduce_func>
6414       Lock         // kmp_critical_name *&<lock>
6415   };
6416   llvm::Value *Res = CGF.EmitRuntimeCall(
6417       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6418                                        : OMPRTL__kmpc_reduce),
6419       Args);
6420 
6421   // 5. Build switch(res)
6422   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6423   llvm::SwitchInst *SwInst =
6424       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6425 
6426   // 6. Build case 1:
6427   //  ...
6428   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6429   //  ...
6430   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6431   // break;
6432   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6433   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6434   CGF.EmitBlock(Case1BB);
6435 
6436   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6437   llvm::Value *EndArgs[] = {
6438       IdentTLoc, // ident_t *<loc>
6439       ThreadId,  // i32 <gtid>
6440       Lock       // kmp_critical_name *&<lock>
6441   };
6442   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6443                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6444     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6445     auto IPriv = Privates.begin();
6446     auto ILHS = LHSExprs.begin();
6447     auto IRHS = RHSExprs.begin();
6448     for (const Expr *E : ReductionOps) {
6449       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6450                                      cast<DeclRefExpr>(*IRHS));
6451       ++IPriv;
6452       ++ILHS;
6453       ++IRHS;
6454     }
6455   };
6456   RegionCodeGenTy RCG(CodeGen);
6457   CommonActionTy Action(
6458       nullptr, llvm::None,
6459       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6460                                        : OMPRTL__kmpc_end_reduce),
6461       EndArgs);
6462   RCG.setAction(Action);
6463   RCG(CGF);
6464 
6465   CGF.EmitBranch(DefaultBB);
6466 
6467   // 7. Build case 2:
6468   //  ...
6469   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6470   //  ...
6471   // break;
6472   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6473   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6474   CGF.EmitBlock(Case2BB);
6475 
6476   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6477                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6478     auto ILHS = LHSExprs.begin();
6479     auto IRHS = RHSExprs.begin();
6480     auto IPriv = Privates.begin();
6481     for (const Expr *E : ReductionOps) {
6482       const Expr *XExpr = nullptr;
6483       const Expr *EExpr = nullptr;
6484       const Expr *UpExpr = nullptr;
6485       BinaryOperatorKind BO = BO_Comma;
6486       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6487         if (BO->getOpcode() == BO_Assign) {
6488           XExpr = BO->getLHS();
6489           UpExpr = BO->getRHS();
6490         }
6491       }
6492       // Try to emit update expression as a simple atomic.
6493       const Expr *RHSExpr = UpExpr;
6494       if (RHSExpr) {
6495         // Analyze RHS part of the whole expression.
6496         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6497                 RHSExpr->IgnoreParenImpCasts())) {
6498           // If this is a conditional operator, analyze its condition for
6499           // min/max reduction operator.
6500           RHSExpr = ACO->getCond();
6501         }
6502         if (const auto *BORHS =
6503                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6504           EExpr = BORHS->getRHS();
6505           BO = BORHS->getOpcode();
6506         }
6507       }
6508       if (XExpr) {
6509         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6510         auto &&AtomicRedGen = [BO, VD,
6511                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6512                                     const Expr *EExpr, const Expr *UpExpr) {
6513           LValue X = CGF.EmitLValue(XExpr);
6514           RValue E;
6515           if (EExpr)
6516             E = CGF.EmitAnyExpr(EExpr);
6517           CGF.EmitOMPAtomicSimpleUpdateExpr(
6518               X, E, BO, /*IsXLHSInRHSPart=*/true,
6519               llvm::AtomicOrdering::Monotonic, Loc,
6520               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6521                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6522                 PrivateScope.addPrivate(
6523                     VD, [&CGF, VD, XRValue, Loc]() {
6524                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6525                       CGF.emitOMPSimpleStore(
6526                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6527                           VD->getType().getNonReferenceType(), Loc);
6528                       return LHSTemp;
6529                     });
6530                 (void)PrivateScope.Privatize();
6531                 return CGF.EmitAnyExpr(UpExpr);
6532               });
6533         };
6534         if ((*IPriv)->getType()->isArrayType()) {
6535           // Emit atomic reduction for array section.
6536           const auto *RHSVar =
6537               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6538           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6539                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6540         } else {
6541           // Emit atomic reduction for array subscript or single variable.
6542           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6543         }
6544       } else {
6545         // Emit as a critical region.
6546         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6547                                            const Expr *, const Expr *) {
6548           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6549           std::string Name = RT.getName({"atomic_reduction"});
6550           RT.emitCriticalRegion(
6551               CGF, Name,
6552               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6553                 Action.Enter(CGF);
6554                 emitReductionCombiner(CGF, E);
6555               },
6556               Loc);
6557         };
6558         if ((*IPriv)->getType()->isArrayType()) {
6559           const auto *LHSVar =
6560               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6561           const auto *RHSVar =
6562               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6563           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6564                                     CritRedGen);
6565         } else {
6566           CritRedGen(CGF, nullptr, nullptr, nullptr);
6567         }
6568       }
6569       ++ILHS;
6570       ++IRHS;
6571       ++IPriv;
6572     }
6573   };
6574   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6575   if (!WithNowait) {
6576     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6577     llvm::Value *EndArgs[] = {
6578         IdentTLoc, // ident_t *<loc>
6579         ThreadId,  // i32 <gtid>
6580         Lock       // kmp_critical_name *&<lock>
6581     };
6582     CommonActionTy Action(nullptr, llvm::None,
6583                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6584                           EndArgs);
6585     AtomicRCG.setAction(Action);
6586     AtomicRCG(CGF);
6587   } else {
6588     AtomicRCG(CGF);
6589   }
6590 
6591   CGF.EmitBranch(DefaultBB);
6592   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6593 }
6594 
6595 /// Generates unique name for artificial threadprivate variables.
6596 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6597 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6598                                       const Expr *Ref) {
6599   SmallString<256> Buffer;
6600   llvm::raw_svector_ostream Out(Buffer);
6601   const clang::DeclRefExpr *DE;
6602   const VarDecl *D = ::getBaseDecl(Ref, DE);
6603   if (!D)
6604     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6605   D = D->getCanonicalDecl();
6606   std::string Name = CGM.getOpenMPRuntime().getName(
6607       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6608   Out << Prefix << Name << "_"
6609       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6610   return std::string(Out.str());
6611 }
6612 
6613 /// Emits reduction initializer function:
6614 /// \code
6615 /// void @.red_init(void* %arg, void* %orig) {
6616 /// %0 = bitcast void* %arg to <type>*
6617 /// store <type> <init>, <type>* %0
6618 /// ret void
6619 /// }
6620 /// \endcode
6621 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6622                                            SourceLocation Loc,
6623                                            ReductionCodeGen &RCG, unsigned N) {
6624   ASTContext &C = CGM.getContext();
6625   QualType VoidPtrTy = C.VoidPtrTy;
6626   VoidPtrTy.addRestrict();
6627   FunctionArgList Args;
6628   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6629                           ImplicitParamDecl::Other);
6630   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6631                               ImplicitParamDecl::Other);
6632   Args.emplace_back(&Param);
6633   Args.emplace_back(&ParamOrig);
6634   const auto &FnInfo =
6635       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6636   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6637   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6638   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6639                                     Name, &CGM.getModule());
6640   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6641   Fn->setDoesNotRecurse();
6642   CodeGenFunction CGF(CGM);
6643   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6644   Address PrivateAddr = CGF.EmitLoadOfPointer(
6645       CGF.GetAddrOfLocalVar(&Param),
6646       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6647   llvm::Value *Size = nullptr;
6648   // If the size of the reduction item is non-constant, load it from global
6649   // threadprivate variable.
6650   if (RCG.getSizes(N).second) {
6651     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6652         CGF, CGM.getContext().getSizeType(),
6653         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6654     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6655                                 CGM.getContext().getSizeType(), Loc);
6656   }
6657   RCG.emitAggregateType(CGF, N, Size);
6658   LValue OrigLVal;
6659   // If initializer uses initializer from declare reduction construct, emit a
6660   // pointer to the address of the original reduction item (reuired by reduction
6661   // initializer)
6662   if (RCG.usesReductionInitializer(N)) {
6663     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
6664     SharedAddr = CGF.EmitLoadOfPointer(
6665         SharedAddr,
6666         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6667     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6668   } else {
6669     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
6670         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6671         CGM.getContext().VoidPtrTy);
6672   }
6673   // Emit the initializer:
6674   // %0 = bitcast void* %arg to <type>*
6675   // store <type> <init>, <type>* %0
6676   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
6677                          [](CodeGenFunction &) { return false; });
6678   CGF.FinishFunction();
6679   return Fn;
6680 }
6681 
6682 /// Emits reduction combiner function:
6683 /// \code
6684 /// void @.red_comb(void* %arg0, void* %arg1) {
6685 /// %lhs = bitcast void* %arg0 to <type>*
6686 /// %rhs = bitcast void* %arg1 to <type>*
6687 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6688 /// store <type> %2, <type>* %lhs
6689 /// ret void
6690 /// }
6691 /// \endcode
6692 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6693                                            SourceLocation Loc,
6694                                            ReductionCodeGen &RCG, unsigned N,
6695                                            const Expr *ReductionOp,
6696                                            const Expr *LHS, const Expr *RHS,
6697                                            const Expr *PrivateRef) {
6698   ASTContext &C = CGM.getContext();
6699   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6700   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6701   FunctionArgList Args;
6702   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6703                                C.VoidPtrTy, ImplicitParamDecl::Other);
6704   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6705                             ImplicitParamDecl::Other);
6706   Args.emplace_back(&ParamInOut);
6707   Args.emplace_back(&ParamIn);
6708   const auto &FnInfo =
6709       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6710   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6711   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6712   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6713                                     Name, &CGM.getModule());
6714   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6715   Fn->setDoesNotRecurse();
6716   CodeGenFunction CGF(CGM);
6717   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6718   llvm::Value *Size = nullptr;
6719   // If the size of the reduction item is non-constant, load it from global
6720   // threadprivate variable.
6721   if (RCG.getSizes(N).second) {
6722     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6723         CGF, CGM.getContext().getSizeType(),
6724         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6725     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6726                                 CGM.getContext().getSizeType(), Loc);
6727   }
6728   RCG.emitAggregateType(CGF, N, Size);
6729   // Remap lhs and rhs variables to the addresses of the function arguments.
6730   // %lhs = bitcast void* %arg0 to <type>*
6731   // %rhs = bitcast void* %arg1 to <type>*
6732   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6733   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6734     // Pull out the pointer to the variable.
6735     Address PtrAddr = CGF.EmitLoadOfPointer(
6736         CGF.GetAddrOfLocalVar(&ParamInOut),
6737         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6738     return CGF.Builder.CreateElementBitCast(
6739         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6740   });
6741   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6742     // Pull out the pointer to the variable.
6743     Address PtrAddr = CGF.EmitLoadOfPointer(
6744         CGF.GetAddrOfLocalVar(&ParamIn),
6745         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6746     return CGF.Builder.CreateElementBitCast(
6747         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6748   });
6749   PrivateScope.Privatize();
6750   // Emit the combiner body:
6751   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6752   // store <type> %2, <type>* %lhs
6753   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6754       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6755       cast<DeclRefExpr>(RHS));
6756   CGF.FinishFunction();
6757   return Fn;
6758 }
6759 
6760 /// Emits reduction finalizer function:
6761 /// \code
6762 /// void @.red_fini(void* %arg) {
6763 /// %0 = bitcast void* %arg to <type>*
6764 /// <destroy>(<type>* %0)
6765 /// ret void
6766 /// }
6767 /// \endcode
6768 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6769                                            SourceLocation Loc,
6770                                            ReductionCodeGen &RCG, unsigned N) {
6771   if (!RCG.needCleanups(N))
6772     return nullptr;
6773   ASTContext &C = CGM.getContext();
6774   FunctionArgList Args;
6775   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6776                           ImplicitParamDecl::Other);
6777   Args.emplace_back(&Param);
6778   const auto &FnInfo =
6779       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6780   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6781   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6782   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6783                                     Name, &CGM.getModule());
6784   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6785   Fn->setDoesNotRecurse();
6786   CodeGenFunction CGF(CGM);
6787   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6788   Address PrivateAddr = CGF.EmitLoadOfPointer(
6789       CGF.GetAddrOfLocalVar(&Param),
6790       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6791   llvm::Value *Size = nullptr;
6792   // If the size of the reduction item is non-constant, load it from global
6793   // threadprivate variable.
6794   if (RCG.getSizes(N).second) {
6795     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6796         CGF, CGM.getContext().getSizeType(),
6797         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6798     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6799                                 CGM.getContext().getSizeType(), Loc);
6800   }
6801   RCG.emitAggregateType(CGF, N, Size);
6802   // Emit the finalizer body:
6803   // <destroy>(<type>* %0)
6804   RCG.emitCleanups(CGF, N, PrivateAddr);
6805   CGF.FinishFunction(Loc);
6806   return Fn;
6807 }
6808 
6809 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6810     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6811     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6812   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6813     return nullptr;
6814 
6815   // Build typedef struct:
6816   // kmp_taskred_input {
6817   //   void *reduce_shar; // shared reduction item
6818   //   void *reduce_orig; // original reduction item used for initialization
6819   //   size_t reduce_size; // size of data item
6820   //   void *reduce_init; // data initialization routine
6821   //   void *reduce_fini; // data finalization routine
6822   //   void *reduce_comb; // data combiner routine
6823   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6824   // } kmp_taskred_input_t;
6825   ASTContext &C = CGM.getContext();
6826   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6827   RD->startDefinition();
6828   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6829   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6830   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6831   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6832   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6833   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6834   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6835       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6836   RD->completeDefinition();
6837   QualType RDType = C.getRecordType(RD);
6838   unsigned Size = Data.ReductionVars.size();
6839   llvm::APInt ArraySize(/*numBits=*/64, Size);
6840   QualType ArrayRDType = C.getConstantArrayType(
6841       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6842   // kmp_task_red_input_t .rd_input.[Size];
6843   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6844   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6845                        Data.ReductionCopies, Data.ReductionOps);
6846   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6847     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6848     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6849                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6850     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6851         TaskRedInput.getPointer(), Idxs,
6852         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6853         ".rd_input.gep.");
6854     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6855     // ElemLVal.reduce_shar = &Shareds[Cnt];
6856     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6857     RCG.emitSharedOrigLValue(CGF, Cnt);
6858     llvm::Value *CastedShared =
6859         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6860     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6861     // ElemLVal.reduce_orig = &Origs[Cnt];
6862     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6863     llvm::Value *CastedOrig =
6864         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6865     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6866     RCG.emitAggregateType(CGF, Cnt);
6867     llvm::Value *SizeValInChars;
6868     llvm::Value *SizeVal;
6869     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6870     // We use delayed creation/initialization for VLAs and array sections. It is
6871     // required because runtime does not provide the way to pass the sizes of
6872     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6873     // threadprivate global variables are used to store these values and use
6874     // them in the functions.
6875     bool DelayedCreation = !!SizeVal;
6876     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6877                                                /*isSigned=*/false);
6878     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6879     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6880     // ElemLVal.reduce_init = init;
6881     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6882     llvm::Value *InitAddr =
6883         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6884     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6885     // ElemLVal.reduce_fini = fini;
6886     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6887     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6888     llvm::Value *FiniAddr = Fini
6889                                 ? CGF.EmitCastToVoidPtr(Fini)
6890                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6891     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6892     // ElemLVal.reduce_comb = comb;
6893     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6894     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6895         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6896         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6897     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6898     // ElemLVal.flags = 0;
6899     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6900     if (DelayedCreation) {
6901       CGF.EmitStoreOfScalar(
6902           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6903           FlagsLVal);
6904     } else
6905       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6906                                  FlagsLVal.getType());
6907   }
6908   if (Data.IsReductionWithTaskMod) {
6909     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6910     // is_ws, int num, void *data);
6911     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6912     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6913                                                   CGM.IntTy, /*isSigned=*/true);
6914     llvm::Value *Args[] = {
6915         IdentTLoc, GTid,
6916         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6917                                /*isSigned=*/true),
6918         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6919         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6920             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6921     return CGF.EmitRuntimeCall(
6922         createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
6923   }
6924   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6925   llvm::Value *Args[] = {
6926       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6927                                 /*isSigned=*/true),
6928       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6929       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6930                                                       CGM.VoidPtrTy)};
6931   return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init),
6932                              Args);
6933 }
6934 
6935 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6936                                             SourceLocation Loc,
6937                                             bool IsWorksharingReduction) {
6938   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6939   // is_ws, int num, void *data);
6940   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6941   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6942                                                 CGM.IntTy, /*isSigned=*/true);
6943   llvm::Value *Args[] = {IdentTLoc, GTid,
6944                          llvm::ConstantInt::get(CGM.IntTy,
6945                                                 IsWorksharingReduction ? 1 : 0,
6946                                                 /*isSigned=*/true)};
6947   (void)CGF.EmitRuntimeCall(
6948       createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
6949 }
6950 
6951 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6952                                               SourceLocation Loc,
6953                                               ReductionCodeGen &RCG,
6954                                               unsigned N) {
6955   auto Sizes = RCG.getSizes(N);
6956   // Emit threadprivate global variable if the type is non-constant
6957   // (Sizes.second = nullptr).
6958   if (Sizes.second) {
6959     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6960                                                      /*isSigned=*/false);
6961     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6962         CGF, CGM.getContext().getSizeType(),
6963         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6964     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6965   }
6966 }
6967 
6968 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6969                                               SourceLocation Loc,
6970                                               llvm::Value *ReductionsPtr,
6971                                               LValue SharedLVal) {
6972   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6973   // *d);
6974   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6975                                                    CGM.IntTy,
6976                                                    /*isSigned=*/true),
6977                          ReductionsPtr,
6978                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6979                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6980   return Address(
6981       CGF.EmitRuntimeCall(
6982           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6983       SharedLVal.getAlignment());
6984 }
6985 
6986 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6987                                        SourceLocation Loc) {
6988   if (!CGF.HaveInsertPoint())
6989     return;
6990 
6991   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6992   if (OMPBuilder) {
6993     OMPBuilder->CreateTaskwait(CGF.Builder);
6994   } else {
6995     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6996     // global_tid);
6997     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6998     // Ignore return result until untied tasks are supported.
6999     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
7000   }
7001 
7002   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
7003     Region->emitUntiedSwitch(CGF);
7004 }
7005 
7006 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
7007                                            OpenMPDirectiveKind InnerKind,
7008                                            const RegionCodeGenTy &CodeGen,
7009                                            bool HasCancel) {
7010   if (!CGF.HaveInsertPoint())
7011     return;
7012   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
7013   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
7014 }
7015 
7016 namespace {
7017 enum RTCancelKind {
7018   CancelNoreq = 0,
7019   CancelParallel = 1,
7020   CancelLoop = 2,
7021   CancelSections = 3,
7022   CancelTaskgroup = 4
7023 };
7024 } // anonymous namespace
7025 
7026 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
7027   RTCancelKind CancelKind = CancelNoreq;
7028   if (CancelRegion == OMPD_parallel)
7029     CancelKind = CancelParallel;
7030   else if (CancelRegion == OMPD_for)
7031     CancelKind = CancelLoop;
7032   else if (CancelRegion == OMPD_sections)
7033     CancelKind = CancelSections;
7034   else {
7035     assert(CancelRegion == OMPD_taskgroup);
7036     CancelKind = CancelTaskgroup;
7037   }
7038   return CancelKind;
7039 }
7040 
7041 void CGOpenMPRuntime::emitCancellationPointCall(
7042     CodeGenFunction &CGF, SourceLocation Loc,
7043     OpenMPDirectiveKind CancelRegion) {
7044   if (!CGF.HaveInsertPoint())
7045     return;
7046   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
7047   // global_tid, kmp_int32 cncl_kind);
7048   if (auto *OMPRegionInfo =
7049           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7050     // For 'cancellation point taskgroup', the task region info may not have a
7051     // cancel. This may instead happen in another adjacent task.
7052     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
7053       llvm::Value *Args[] = {
7054           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
7055           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7056       // Ignore return result until untied tasks are supported.
7057       llvm::Value *Result = CGF.EmitRuntimeCall(
7058           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
7059       // if (__kmpc_cancellationpoint()) {
7060       //   exit from construct;
7061       // }
7062       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7063       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7064       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7065       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7066       CGF.EmitBlock(ExitBB);
7067       // exit from construct;
7068       CodeGenFunction::JumpDest CancelDest =
7069           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7070       CGF.EmitBranchThroughCleanup(CancelDest);
7071       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7072     }
7073   }
7074 }
7075 
7076 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
7077                                      const Expr *IfCond,
7078                                      OpenMPDirectiveKind CancelRegion) {
7079   if (!CGF.HaveInsertPoint())
7080     return;
7081   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
7082   // kmp_int32 cncl_kind);
7083   if (auto *OMPRegionInfo =
7084           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7085     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
7086                                                         PrePostActionTy &) {
7087       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
7088       llvm::Value *Args[] = {
7089           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
7090           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7091       // Ignore return result until untied tasks are supported.
7092       llvm::Value *Result = CGF.EmitRuntimeCall(
7093           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
7094       // if (__kmpc_cancel()) {
7095       //   exit from construct;
7096       // }
7097       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7098       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7099       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7100       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7101       CGF.EmitBlock(ExitBB);
7102       // exit from construct;
7103       CodeGenFunction::JumpDest CancelDest =
7104           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7105       CGF.EmitBranchThroughCleanup(CancelDest);
7106       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7107     };
7108     if (IfCond) {
7109       emitIfClause(CGF, IfCond, ThenGen,
7110                    [](CodeGenFunction &, PrePostActionTy &) {});
7111     } else {
7112       RegionCodeGenTy ThenRCG(ThenGen);
7113       ThenRCG(CGF);
7114     }
7115   }
7116 }
7117 
7118 namespace {
7119 /// Cleanup action for uses_allocators support.
7120 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
7121   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
7122 
7123 public:
7124   OMPUsesAllocatorsActionTy(
7125       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
7126       : Allocators(Allocators) {}
7127   void Enter(CodeGenFunction &CGF) override {
7128     if (!CGF.HaveInsertPoint())
7129       return;
7130     for (const auto &AllocatorData : Allocators) {
7131       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
7132           CGF, AllocatorData.first, AllocatorData.second);
7133     }
7134   }
7135   void Exit(CodeGenFunction &CGF) override {
7136     if (!CGF.HaveInsertPoint())
7137       return;
7138     for (const auto &AllocatorData : Allocators) {
7139       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
7140                                                         AllocatorData.first);
7141     }
7142   }
7143 };
7144 } // namespace
7145 
7146 void CGOpenMPRuntime::emitTargetOutlinedFunction(
7147     const OMPExecutableDirective &D, StringRef ParentName,
7148     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7149     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7150   assert(!ParentName.empty() && "Invalid target region parent name!");
7151   HasEmittedTargetRegion = true;
7152   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
7153   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7154     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7155       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7156       if (!D.AllocatorTraits)
7157         continue;
7158       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
7159     }
7160   }
7161   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
7162   CodeGen.setAction(UsesAllocatorAction);
7163   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
7164                                    IsOffloadEntry, CodeGen);
7165 }
7166 
7167 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
7168                                              const Expr *Allocator,
7169                                              const Expr *AllocatorTraits) {
7170   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
7171   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
7172   // Use default memspace handle.
7173   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
7174   llvm::Value *NumTraits = llvm::ConstantInt::get(
7175       CGF.IntTy, cast<ConstantArrayType>(
7176                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
7177                      ->getSize()
7178                      .getLimitedValue());
7179   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
7180   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7181       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
7182   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
7183                                            AllocatorTraitsLVal.getBaseInfo(),
7184                                            AllocatorTraitsLVal.getTBAAInfo());
7185   llvm::Value *Traits =
7186       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
7187 
7188   llvm::Value *AllocatorVal =
7189       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator),
7190                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
7191   // Store to allocator.
7192   CGF.EmitVarDecl(*cast<VarDecl>(
7193       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
7194   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
7195   AllocatorVal =
7196       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
7197                                Allocator->getType(), Allocator->getExprLoc());
7198   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
7199 }
7200 
7201 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
7202                                              const Expr *Allocator) {
7203   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
7204   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
7205   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
7206   llvm::Value *AllocatorVal =
7207       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
7208   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
7209                                           CGF.getContext().VoidPtrTy,
7210                                           Allocator->getExprLoc());
7211   (void)CGF.EmitRuntimeCall(
7212       createRuntimeFunction(OMPRTL__kmpc_destroy_allocator),
7213       {ThreadId, AllocatorVal});
7214 }
7215 
7216 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
7217     const OMPExecutableDirective &D, StringRef ParentName,
7218     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7219     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7220   // Create a unique name for the entry function using the source location
7221   // information of the current target region. The name will be something like:
7222   //
7223   // __omp_offloading_DD_FFFF_PP_lBB
7224   //
7225   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
7226   // mangled name of the function that encloses the target region and BB is the
7227   // line number of the target region.
7228 
7229   unsigned DeviceID;
7230   unsigned FileID;
7231   unsigned Line;
7232   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
7233                            Line);
7234   SmallString<64> EntryFnName;
7235   {
7236     llvm::raw_svector_ostream OS(EntryFnName);
7237     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
7238        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
7239   }
7240 
7241   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7242 
7243   CodeGenFunction CGF(CGM, true);
7244   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
7245   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7246 
7247   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
7248 
7249   // If this target outline function is not an offload entry, we don't need to
7250   // register it.
7251   if (!IsOffloadEntry)
7252     return;
7253 
7254   // The target region ID is used by the runtime library to identify the current
7255   // target region, so it only has to be unique and not necessarily point to
7256   // anything. It could be the pointer to the outlined function that implements
7257   // the target region, but we aren't using that so that the compiler doesn't
7258   // need to keep that, and could therefore inline the host function if proven
7259   // worthwhile during optimization. In the other hand, if emitting code for the
7260   // device, the ID has to be the function address so that it can retrieved from
7261   // the offloading entry and launched by the runtime library. We also mark the
7262   // outlined function to have external linkage in case we are emitting code for
7263   // the device, because these functions will be entry points to the device.
7264 
7265   if (CGM.getLangOpts().OpenMPIsDevice) {
7266     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
7267     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
7268     OutlinedFn->setDSOLocal(false);
7269   } else {
7270     std::string Name = getName({EntryFnName, "region_id"});
7271     OutlinedFnID = new llvm::GlobalVariable(
7272         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
7273         llvm::GlobalValue::WeakAnyLinkage,
7274         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
7275   }
7276 
7277   // Register the information for the entry associated with this target region.
7278   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
7279       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
7280       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
7281 }
7282 
7283 /// Checks if the expression is constant or does not have non-trivial function
7284 /// calls.
7285 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
7286   // We can skip constant expressions.
7287   // We can skip expressions with trivial calls or simple expressions.
7288   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
7289           !E->hasNonTrivialCall(Ctx)) &&
7290          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
7291 }
7292 
7293 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
7294                                                     const Stmt *Body) {
7295   const Stmt *Child = Body->IgnoreContainers();
7296   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
7297     Child = nullptr;
7298     for (const Stmt *S : C->body()) {
7299       if (const auto *E = dyn_cast<Expr>(S)) {
7300         if (isTrivial(Ctx, E))
7301           continue;
7302       }
7303       // Some of the statements can be ignored.
7304       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
7305           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
7306         continue;
7307       // Analyze declarations.
7308       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
7309         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
7310               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
7311                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
7312                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
7313                   isa<UsingDirectiveDecl>(D) ||
7314                   isa<OMPDeclareReductionDecl>(D) ||
7315                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
7316                 return true;
7317               const auto *VD = dyn_cast<VarDecl>(D);
7318               if (!VD)
7319                 return false;
7320               return VD->isConstexpr() ||
7321                      ((VD->getType().isTrivialType(Ctx) ||
7322                        VD->getType()->isReferenceType()) &&
7323                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
7324             }))
7325           continue;
7326       }
7327       // Found multiple children - cannot get the one child only.
7328       if (Child)
7329         return nullptr;
7330       Child = S;
7331     }
7332     if (Child)
7333       Child = Child->IgnoreContainers();
7334   }
7335   return Child;
7336 }
7337 
7338 /// Emit the number of teams for a target directive.  Inspect the num_teams
7339 /// clause associated with a teams construct combined or closely nested
7340 /// with the target directive.
7341 ///
7342 /// Emit a team of size one for directives such as 'target parallel' that
7343 /// have no associated teams construct.
7344 ///
7345 /// Otherwise, return nullptr.
7346 static llvm::Value *
7347 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
7348                                const OMPExecutableDirective &D) {
7349   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7350          "Clauses associated with the teams directive expected to be emitted "
7351          "only for the host!");
7352   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7353   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7354          "Expected target-based executable directive.");
7355   CGBuilderTy &Bld = CGF.Builder;
7356   switch (DirectiveKind) {
7357   case OMPD_target: {
7358     const auto *CS = D.getInnermostCapturedStmt();
7359     const auto *Body =
7360         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
7361     const Stmt *ChildStmt =
7362         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
7363     if (const auto *NestedDir =
7364             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
7365       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
7366         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
7367           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7368           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7369           const Expr *NumTeams =
7370               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7371           llvm::Value *NumTeamsVal =
7372               CGF.EmitScalarExpr(NumTeams,
7373                                  /*IgnoreResultAssign*/ true);
7374           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7375                                    /*isSigned=*/true);
7376         }
7377         return Bld.getInt32(0);
7378       }
7379       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
7380           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
7381         return Bld.getInt32(1);
7382       return Bld.getInt32(0);
7383     }
7384     return nullptr;
7385   }
7386   case OMPD_target_teams:
7387   case OMPD_target_teams_distribute:
7388   case OMPD_target_teams_distribute_simd:
7389   case OMPD_target_teams_distribute_parallel_for:
7390   case OMPD_target_teams_distribute_parallel_for_simd: {
7391     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
7392       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
7393       const Expr *NumTeams =
7394           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7395       llvm::Value *NumTeamsVal =
7396           CGF.EmitScalarExpr(NumTeams,
7397                              /*IgnoreResultAssign*/ true);
7398       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7399                                /*isSigned=*/true);
7400     }
7401     return Bld.getInt32(0);
7402   }
7403   case OMPD_target_parallel:
7404   case OMPD_target_parallel_for:
7405   case OMPD_target_parallel_for_simd:
7406   case OMPD_target_simd:
7407     return Bld.getInt32(1);
7408   case OMPD_parallel:
7409   case OMPD_for:
7410   case OMPD_parallel_for:
7411   case OMPD_parallel_master:
7412   case OMPD_parallel_sections:
7413   case OMPD_for_simd:
7414   case OMPD_parallel_for_simd:
7415   case OMPD_cancel:
7416   case OMPD_cancellation_point:
7417   case OMPD_ordered:
7418   case OMPD_threadprivate:
7419   case OMPD_allocate:
7420   case OMPD_task:
7421   case OMPD_simd:
7422   case OMPD_sections:
7423   case OMPD_section:
7424   case OMPD_single:
7425   case OMPD_master:
7426   case OMPD_critical:
7427   case OMPD_taskyield:
7428   case OMPD_barrier:
7429   case OMPD_taskwait:
7430   case OMPD_taskgroup:
7431   case OMPD_atomic:
7432   case OMPD_flush:
7433   case OMPD_depobj:
7434   case OMPD_scan:
7435   case OMPD_teams:
7436   case OMPD_target_data:
7437   case OMPD_target_exit_data:
7438   case OMPD_target_enter_data:
7439   case OMPD_distribute:
7440   case OMPD_distribute_simd:
7441   case OMPD_distribute_parallel_for:
7442   case OMPD_distribute_parallel_for_simd:
7443   case OMPD_teams_distribute:
7444   case OMPD_teams_distribute_simd:
7445   case OMPD_teams_distribute_parallel_for:
7446   case OMPD_teams_distribute_parallel_for_simd:
7447   case OMPD_target_update:
7448   case OMPD_declare_simd:
7449   case OMPD_declare_variant:
7450   case OMPD_begin_declare_variant:
7451   case OMPD_end_declare_variant:
7452   case OMPD_declare_target:
7453   case OMPD_end_declare_target:
7454   case OMPD_declare_reduction:
7455   case OMPD_declare_mapper:
7456   case OMPD_taskloop:
7457   case OMPD_taskloop_simd:
7458   case OMPD_master_taskloop:
7459   case OMPD_master_taskloop_simd:
7460   case OMPD_parallel_master_taskloop:
7461   case OMPD_parallel_master_taskloop_simd:
7462   case OMPD_requires:
7463   case OMPD_unknown:
7464     break;
7465   }
7466   llvm_unreachable("Unexpected directive kind.");
7467 }
7468 
7469 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
7470                                   llvm::Value *DefaultThreadLimitVal) {
7471   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7472       CGF.getContext(), CS->getCapturedStmt());
7473   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7474     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
7475       llvm::Value *NumThreads = nullptr;
7476       llvm::Value *CondVal = nullptr;
7477       // Handle if clause. If if clause present, the number of threads is
7478       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7479       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7480         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7481         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7482         const OMPIfClause *IfClause = nullptr;
7483         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7484           if (C->getNameModifier() == OMPD_unknown ||
7485               C->getNameModifier() == OMPD_parallel) {
7486             IfClause = C;
7487             break;
7488           }
7489         }
7490         if (IfClause) {
7491           const Expr *Cond = IfClause->getCondition();
7492           bool Result;
7493           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7494             if (!Result)
7495               return CGF.Builder.getInt32(1);
7496           } else {
7497             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7498             if (const auto *PreInit =
7499                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7500               for (const auto *I : PreInit->decls()) {
7501                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7502                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7503                 } else {
7504                   CodeGenFunction::AutoVarEmission Emission =
7505                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7506                   CGF.EmitAutoVarCleanups(Emission);
7507                 }
7508               }
7509             }
7510             CondVal = CGF.EvaluateExprAsBool(Cond);
7511           }
7512         }
7513       }
7514       // Check the value of num_threads clause iff if clause was not specified
7515       // or is not evaluated to false.
7516       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7517         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7518         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7519         const auto *NumThreadsClause =
7520             Dir->getSingleClause<OMPNumThreadsClause>();
7521         CodeGenFunction::LexicalScope Scope(
7522             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7523         if (const auto *PreInit =
7524                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7525           for (const auto *I : PreInit->decls()) {
7526             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7527               CGF.EmitVarDecl(cast<VarDecl>(*I));
7528             } else {
7529               CodeGenFunction::AutoVarEmission Emission =
7530                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7531               CGF.EmitAutoVarCleanups(Emission);
7532             }
7533           }
7534         }
7535         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7536         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7537                                                /*isSigned=*/false);
7538         if (DefaultThreadLimitVal)
7539           NumThreads = CGF.Builder.CreateSelect(
7540               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7541               DefaultThreadLimitVal, NumThreads);
7542       } else {
7543         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7544                                            : CGF.Builder.getInt32(0);
7545       }
7546       // Process condition of the if clause.
7547       if (CondVal) {
7548         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7549                                               CGF.Builder.getInt32(1));
7550       }
7551       return NumThreads;
7552     }
7553     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7554       return CGF.Builder.getInt32(1);
7555     return DefaultThreadLimitVal;
7556   }
7557   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7558                                : CGF.Builder.getInt32(0);
7559 }
7560 
7561 /// Emit the number of threads for a target directive.  Inspect the
7562 /// thread_limit clause associated with a teams construct combined or closely
7563 /// nested with the target directive.
7564 ///
7565 /// Emit the num_threads clause for directives such as 'target parallel' that
7566 /// have no associated teams construct.
7567 ///
7568 /// Otherwise, return nullptr.
7569 static llvm::Value *
7570 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7571                                  const OMPExecutableDirective &D) {
7572   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7573          "Clauses associated with the teams directive expected to be emitted "
7574          "only for the host!");
7575   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7576   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7577          "Expected target-based executable directive.");
7578   CGBuilderTy &Bld = CGF.Builder;
7579   llvm::Value *ThreadLimitVal = nullptr;
7580   llvm::Value *NumThreadsVal = nullptr;
7581   switch (DirectiveKind) {
7582   case OMPD_target: {
7583     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7584     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7585       return NumThreads;
7586     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7587         CGF.getContext(), CS->getCapturedStmt());
7588     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7589       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7590         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7591         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7592         const auto *ThreadLimitClause =
7593             Dir->getSingleClause<OMPThreadLimitClause>();
7594         CodeGenFunction::LexicalScope Scope(
7595             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7596         if (const auto *PreInit =
7597                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7598           for (const auto *I : PreInit->decls()) {
7599             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7600               CGF.EmitVarDecl(cast<VarDecl>(*I));
7601             } else {
7602               CodeGenFunction::AutoVarEmission Emission =
7603                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7604               CGF.EmitAutoVarCleanups(Emission);
7605             }
7606           }
7607         }
7608         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7609             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7610         ThreadLimitVal =
7611             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7612       }
7613       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7614           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7615         CS = Dir->getInnermostCapturedStmt();
7616         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7617             CGF.getContext(), CS->getCapturedStmt());
7618         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7619       }
7620       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7621           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7622         CS = Dir->getInnermostCapturedStmt();
7623         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7624           return NumThreads;
7625       }
7626       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7627         return Bld.getInt32(1);
7628     }
7629     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7630   }
7631   case OMPD_target_teams: {
7632     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7633       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7634       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7635       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7636           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7637       ThreadLimitVal =
7638           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7639     }
7640     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7641     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7642       return NumThreads;
7643     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7644         CGF.getContext(), CS->getCapturedStmt());
7645     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7646       if (Dir->getDirectiveKind() == OMPD_distribute) {
7647         CS = Dir->getInnermostCapturedStmt();
7648         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7649           return NumThreads;
7650       }
7651     }
7652     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7653   }
7654   case OMPD_target_teams_distribute:
7655     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7656       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7657       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7658       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7659           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7660       ThreadLimitVal =
7661           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7662     }
7663     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7664   case OMPD_target_parallel:
7665   case OMPD_target_parallel_for:
7666   case OMPD_target_parallel_for_simd:
7667   case OMPD_target_teams_distribute_parallel_for:
7668   case OMPD_target_teams_distribute_parallel_for_simd: {
7669     llvm::Value *CondVal = nullptr;
7670     // Handle if clause. If if clause present, the number of threads is
7671     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7672     if (D.hasClausesOfKind<OMPIfClause>()) {
7673       const OMPIfClause *IfClause = nullptr;
7674       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7675         if (C->getNameModifier() == OMPD_unknown ||
7676             C->getNameModifier() == OMPD_parallel) {
7677           IfClause = C;
7678           break;
7679         }
7680       }
7681       if (IfClause) {
7682         const Expr *Cond = IfClause->getCondition();
7683         bool Result;
7684         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7685           if (!Result)
7686             return Bld.getInt32(1);
7687         } else {
7688           CodeGenFunction::RunCleanupsScope Scope(CGF);
7689           CondVal = CGF.EvaluateExprAsBool(Cond);
7690         }
7691       }
7692     }
7693     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7694       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7695       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7696       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7697           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7698       ThreadLimitVal =
7699           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7700     }
7701     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7702       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7703       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7704       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7705           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7706       NumThreadsVal =
7707           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7708       ThreadLimitVal = ThreadLimitVal
7709                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7710                                                                 ThreadLimitVal),
7711                                               NumThreadsVal, ThreadLimitVal)
7712                            : NumThreadsVal;
7713     }
7714     if (!ThreadLimitVal)
7715       ThreadLimitVal = Bld.getInt32(0);
7716     if (CondVal)
7717       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7718     return ThreadLimitVal;
7719   }
7720   case OMPD_target_teams_distribute_simd:
7721   case OMPD_target_simd:
7722     return Bld.getInt32(1);
7723   case OMPD_parallel:
7724   case OMPD_for:
7725   case OMPD_parallel_for:
7726   case OMPD_parallel_master:
7727   case OMPD_parallel_sections:
7728   case OMPD_for_simd:
7729   case OMPD_parallel_for_simd:
7730   case OMPD_cancel:
7731   case OMPD_cancellation_point:
7732   case OMPD_ordered:
7733   case OMPD_threadprivate:
7734   case OMPD_allocate:
7735   case OMPD_task:
7736   case OMPD_simd:
7737   case OMPD_sections:
7738   case OMPD_section:
7739   case OMPD_single:
7740   case OMPD_master:
7741   case OMPD_critical:
7742   case OMPD_taskyield:
7743   case OMPD_barrier:
7744   case OMPD_taskwait:
7745   case OMPD_taskgroup:
7746   case OMPD_atomic:
7747   case OMPD_flush:
7748   case OMPD_depobj:
7749   case OMPD_scan:
7750   case OMPD_teams:
7751   case OMPD_target_data:
7752   case OMPD_target_exit_data:
7753   case OMPD_target_enter_data:
7754   case OMPD_distribute:
7755   case OMPD_distribute_simd:
7756   case OMPD_distribute_parallel_for:
7757   case OMPD_distribute_parallel_for_simd:
7758   case OMPD_teams_distribute:
7759   case OMPD_teams_distribute_simd:
7760   case OMPD_teams_distribute_parallel_for:
7761   case OMPD_teams_distribute_parallel_for_simd:
7762   case OMPD_target_update:
7763   case OMPD_declare_simd:
7764   case OMPD_declare_variant:
7765   case OMPD_begin_declare_variant:
7766   case OMPD_end_declare_variant:
7767   case OMPD_declare_target:
7768   case OMPD_end_declare_target:
7769   case OMPD_declare_reduction:
7770   case OMPD_declare_mapper:
7771   case OMPD_taskloop:
7772   case OMPD_taskloop_simd:
7773   case OMPD_master_taskloop:
7774   case OMPD_master_taskloop_simd:
7775   case OMPD_parallel_master_taskloop:
7776   case OMPD_parallel_master_taskloop_simd:
7777   case OMPD_requires:
7778   case OMPD_unknown:
7779     break;
7780   }
7781   llvm_unreachable("Unsupported directive kind.");
7782 }
7783 
7784 namespace {
7785 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7786 
7787 // Utility to handle information from clauses associated with a given
7788 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7789 // It provides a convenient interface to obtain the information and generate
7790 // code for that information.
7791 class MappableExprsHandler {
7792 public:
7793   /// Values for bit flags used to specify the mapping type for
7794   /// offloading.
7795   enum OpenMPOffloadMappingFlags : uint64_t {
7796     /// No flags
7797     OMP_MAP_NONE = 0x0,
7798     /// Allocate memory on the device and move data from host to device.
7799     OMP_MAP_TO = 0x01,
7800     /// Allocate memory on the device and move data from device to host.
7801     OMP_MAP_FROM = 0x02,
7802     /// Always perform the requested mapping action on the element, even
7803     /// if it was already mapped before.
7804     OMP_MAP_ALWAYS = 0x04,
7805     /// Delete the element from the device environment, ignoring the
7806     /// current reference count associated with the element.
7807     OMP_MAP_DELETE = 0x08,
7808     /// The element being mapped is a pointer-pointee pair; both the
7809     /// pointer and the pointee should be mapped.
7810     OMP_MAP_PTR_AND_OBJ = 0x10,
7811     /// This flags signals that the base address of an entry should be
7812     /// passed to the target kernel as an argument.
7813     OMP_MAP_TARGET_PARAM = 0x20,
7814     /// Signal that the runtime library has to return the device pointer
7815     /// in the current position for the data being mapped. Used when we have the
7816     /// use_device_ptr clause.
7817     OMP_MAP_RETURN_PARAM = 0x40,
7818     /// This flag signals that the reference being passed is a pointer to
7819     /// private data.
7820     OMP_MAP_PRIVATE = 0x80,
7821     /// Pass the element to the device by value.
7822     OMP_MAP_LITERAL = 0x100,
7823     /// Implicit map
7824     OMP_MAP_IMPLICIT = 0x200,
7825     /// Close is a hint to the runtime to allocate memory close to
7826     /// the target device.
7827     OMP_MAP_CLOSE = 0x400,
7828     /// The 16 MSBs of the flags indicate whether the entry is member of some
7829     /// struct/class.
7830     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7831     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7832   };
7833 
7834   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7835   static unsigned getFlagMemberOffset() {
7836     unsigned Offset = 0;
7837     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7838          Remain = Remain >> 1)
7839       Offset++;
7840     return Offset;
7841   }
7842 
7843   /// Class that associates information with a base pointer to be passed to the
7844   /// runtime library.
7845   class BasePointerInfo {
7846     /// The base pointer.
7847     llvm::Value *Ptr = nullptr;
7848     /// The base declaration that refers to this device pointer, or null if
7849     /// there is none.
7850     const ValueDecl *DevPtrDecl = nullptr;
7851 
7852   public:
7853     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7854         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7855     llvm::Value *operator*() const { return Ptr; }
7856     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7857     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7858   };
7859 
7860   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7861   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7862   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7863 
7864   /// Map between a struct and the its lowest & highest elements which have been
7865   /// mapped.
7866   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7867   ///                    HE(FieldIndex, Pointer)}
7868   struct StructRangeInfoTy {
7869     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7870         0, Address::invalid()};
7871     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7872         0, Address::invalid()};
7873     Address Base = Address::invalid();
7874   };
7875 
7876 private:
7877   /// Kind that defines how a device pointer has to be returned.
7878   struct MapInfo {
7879     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7880     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7881     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7882     bool ReturnDevicePointer = false;
7883     bool IsImplicit = false;
7884 
7885     MapInfo() = default;
7886     MapInfo(
7887         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7888         OpenMPMapClauseKind MapType,
7889         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7890         bool ReturnDevicePointer, bool IsImplicit)
7891         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7892           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7893   };
7894 
7895   /// If use_device_ptr is used on a pointer which is a struct member and there
7896   /// is no map information about it, then emission of that entry is deferred
7897   /// until the whole struct has been processed.
7898   struct DeferredDevicePtrEntryTy {
7899     const Expr *IE = nullptr;
7900     const ValueDecl *VD = nullptr;
7901 
7902     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7903         : IE(IE), VD(VD) {}
7904   };
7905 
7906   /// The target directive from where the mappable clauses were extracted. It
7907   /// is either a executable directive or a user-defined mapper directive.
7908   llvm::PointerUnion<const OMPExecutableDirective *,
7909                      const OMPDeclareMapperDecl *>
7910       CurDir;
7911 
7912   /// Function the directive is being generated for.
7913   CodeGenFunction &CGF;
7914 
7915   /// Set of all first private variables in the current directive.
7916   /// bool data is set to true if the variable is implicitly marked as
7917   /// firstprivate, false otherwise.
7918   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7919 
7920   /// Map between device pointer declarations and their expression components.
7921   /// The key value for declarations in 'this' is null.
7922   llvm::DenseMap<
7923       const ValueDecl *,
7924       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7925       DevPointersMap;
7926 
7927   llvm::Value *getExprTypeSize(const Expr *E) const {
7928     QualType ExprTy = E->getType().getCanonicalType();
7929 
7930     // Calculate the size for array shaping expression.
7931     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7932       llvm::Value *Size =
7933           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7934       for (const Expr *SE : OAE->getDimensions()) {
7935         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7936         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7937                                       CGF.getContext().getSizeType(),
7938                                       SE->getExprLoc());
7939         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7940       }
7941       return Size;
7942     }
7943 
7944     // Reference types are ignored for mapping purposes.
7945     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7946       ExprTy = RefTy->getPointeeType().getCanonicalType();
7947 
7948     // Given that an array section is considered a built-in type, we need to
7949     // do the calculation based on the length of the section instead of relying
7950     // on CGF.getTypeSize(E->getType()).
7951     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7952       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7953                             OAE->getBase()->IgnoreParenImpCasts())
7954                             .getCanonicalType();
7955 
7956       // If there is no length associated with the expression and lower bound is
7957       // not specified too, that means we are using the whole length of the
7958       // base.
7959       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7960           !OAE->getLowerBound())
7961         return CGF.getTypeSize(BaseTy);
7962 
7963       llvm::Value *ElemSize;
7964       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7965         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7966       } else {
7967         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7968         assert(ATy && "Expecting array type if not a pointer type.");
7969         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7970       }
7971 
7972       // If we don't have a length at this point, that is because we have an
7973       // array section with a single element.
7974       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7975         return ElemSize;
7976 
7977       if (const Expr *LenExpr = OAE->getLength()) {
7978         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7979         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7980                                              CGF.getContext().getSizeType(),
7981                                              LenExpr->getExprLoc());
7982         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7983       }
7984       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7985              OAE->getLowerBound() && "expected array_section[lb:].");
7986       // Size = sizetype - lb * elemtype;
7987       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7988       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7989       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7990                                        CGF.getContext().getSizeType(),
7991                                        OAE->getLowerBound()->getExprLoc());
7992       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7993       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7994       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7995       LengthVal = CGF.Builder.CreateSelect(
7996           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7997       return LengthVal;
7998     }
7999     return CGF.getTypeSize(ExprTy);
8000   }
8001 
8002   /// Return the corresponding bits for a given map clause modifier. Add
8003   /// a flag marking the map as a pointer if requested. Add a flag marking the
8004   /// map as the first one of a series of maps that relate to the same map
8005   /// expression.
8006   OpenMPOffloadMappingFlags getMapTypeBits(
8007       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
8008       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
8009     OpenMPOffloadMappingFlags Bits =
8010         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
8011     switch (MapType) {
8012     case OMPC_MAP_alloc:
8013     case OMPC_MAP_release:
8014       // alloc and release is the default behavior in the runtime library,  i.e.
8015       // if we don't pass any bits alloc/release that is what the runtime is
8016       // going to do. Therefore, we don't need to signal anything for these two
8017       // type modifiers.
8018       break;
8019     case OMPC_MAP_to:
8020       Bits |= OMP_MAP_TO;
8021       break;
8022     case OMPC_MAP_from:
8023       Bits |= OMP_MAP_FROM;
8024       break;
8025     case OMPC_MAP_tofrom:
8026       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
8027       break;
8028     case OMPC_MAP_delete:
8029       Bits |= OMP_MAP_DELETE;
8030       break;
8031     case OMPC_MAP_unknown:
8032       llvm_unreachable("Unexpected map type!");
8033     }
8034     if (AddPtrFlag)
8035       Bits |= OMP_MAP_PTR_AND_OBJ;
8036     if (AddIsTargetParamFlag)
8037       Bits |= OMP_MAP_TARGET_PARAM;
8038     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
8039         != MapModifiers.end())
8040       Bits |= OMP_MAP_ALWAYS;
8041     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
8042         != MapModifiers.end())
8043       Bits |= OMP_MAP_CLOSE;
8044     return Bits;
8045   }
8046 
8047   /// Return true if the provided expression is a final array section. A
8048   /// final array section, is one whose length can't be proved to be one.
8049   bool isFinalArraySectionExpression(const Expr *E) const {
8050     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
8051 
8052     // It is not an array section and therefore not a unity-size one.
8053     if (!OASE)
8054       return false;
8055 
8056     // An array section with no colon always refer to a single element.
8057     if (OASE->getColonLoc().isInvalid())
8058       return false;
8059 
8060     const Expr *Length = OASE->getLength();
8061 
8062     // If we don't have a length we have to check if the array has size 1
8063     // for this dimension. Also, we should always expect a length if the
8064     // base type is pointer.
8065     if (!Length) {
8066       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
8067                              OASE->getBase()->IgnoreParenImpCasts())
8068                              .getCanonicalType();
8069       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
8070         return ATy->getSize().getSExtValue() != 1;
8071       // If we don't have a constant dimension length, we have to consider
8072       // the current section as having any size, so it is not necessarily
8073       // unitary. If it happen to be unity size, that's user fault.
8074       return true;
8075     }
8076 
8077     // Check if the length evaluates to 1.
8078     Expr::EvalResult Result;
8079     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
8080       return true; // Can have more that size 1.
8081 
8082     llvm::APSInt ConstLength = Result.Val.getInt();
8083     return ConstLength.getSExtValue() != 1;
8084   }
8085 
8086   /// Generate the base pointers, section pointers, sizes and map type
8087   /// bits for the provided map type, map modifier, and expression components.
8088   /// \a IsFirstComponent should be set to true if the provided set of
8089   /// components is the first associated with a capture.
8090   void generateInfoForComponentList(
8091       OpenMPMapClauseKind MapType,
8092       ArrayRef<OpenMPMapModifierKind> MapModifiers,
8093       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8094       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8095       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8096       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
8097       bool IsImplicit,
8098       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8099           OverlappedElements = llvm::None) const {
8100     // The following summarizes what has to be generated for each map and the
8101     // types below. The generated information is expressed in this order:
8102     // base pointer, section pointer, size, flags
8103     // (to add to the ones that come from the map type and modifier).
8104     //
8105     // double d;
8106     // int i[100];
8107     // float *p;
8108     //
8109     // struct S1 {
8110     //   int i;
8111     //   float f[50];
8112     // }
8113     // struct S2 {
8114     //   int i;
8115     //   float f[50];
8116     //   S1 s;
8117     //   double *p;
8118     //   struct S2 *ps;
8119     // }
8120     // S2 s;
8121     // S2 *ps;
8122     //
8123     // map(d)
8124     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
8125     //
8126     // map(i)
8127     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
8128     //
8129     // map(i[1:23])
8130     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
8131     //
8132     // map(p)
8133     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
8134     //
8135     // map(p[1:24])
8136     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
8137     //
8138     // map(s)
8139     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
8140     //
8141     // map(s.i)
8142     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
8143     //
8144     // map(s.s.f)
8145     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8146     //
8147     // map(s.p)
8148     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
8149     //
8150     // map(to: s.p[:22])
8151     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
8152     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
8153     // &(s.p), &(s.p[0]), 22*sizeof(double),
8154     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
8155     // (*) alloc space for struct members, only this is a target parameter
8156     // (**) map the pointer (nothing to be mapped in this example) (the compiler
8157     //      optimizes this entry out, same in the examples below)
8158     // (***) map the pointee (map: to)
8159     //
8160     // map(s.ps)
8161     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8162     //
8163     // map(from: s.ps->s.i)
8164     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8165     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8166     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
8167     //
8168     // map(to: s.ps->ps)
8169     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8170     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8171     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
8172     //
8173     // map(s.ps->ps->ps)
8174     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8175     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8176     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8177     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8178     //
8179     // map(to: s.ps->ps->s.f[:22])
8180     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8181     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8182     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8183     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8184     //
8185     // map(ps)
8186     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
8187     //
8188     // map(ps->i)
8189     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
8190     //
8191     // map(ps->s.f)
8192     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8193     //
8194     // map(from: ps->p)
8195     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
8196     //
8197     // map(to: ps->p[:22])
8198     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
8199     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
8200     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
8201     //
8202     // map(ps->ps)
8203     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8204     //
8205     // map(from: ps->ps->s.i)
8206     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8207     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8208     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8209     //
8210     // map(from: ps->ps->ps)
8211     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8212     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8213     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8214     //
8215     // map(ps->ps->ps->ps)
8216     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8217     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8218     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8219     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8220     //
8221     // map(to: ps->ps->ps->s.f[:22])
8222     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8223     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8224     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8225     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8226     //
8227     // map(to: s.f[:22]) map(from: s.p[:33])
8228     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
8229     //     sizeof(double*) (**), TARGET_PARAM
8230     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
8231     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
8232     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8233     // (*) allocate contiguous space needed to fit all mapped members even if
8234     //     we allocate space for members not mapped (in this example,
8235     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
8236     //     them as well because they fall between &s.f[0] and &s.p)
8237     //
8238     // map(from: s.f[:22]) map(to: ps->p[:33])
8239     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
8240     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8241     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
8242     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
8243     // (*) the struct this entry pertains to is the 2nd element in the list of
8244     //     arguments, hence MEMBER_OF(2)
8245     //
8246     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
8247     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
8248     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
8249     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
8250     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8251     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
8252     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
8253     // (*) the struct this entry pertains to is the 4th element in the list
8254     //     of arguments, hence MEMBER_OF(4)
8255 
8256     // Track if the map information being generated is the first for a capture.
8257     bool IsCaptureFirstInfo = IsFirstComponentList;
8258     // When the variable is on a declare target link or in a to clause with
8259     // unified memory, a reference is needed to hold the host/device address
8260     // of the variable.
8261     bool RequiresReference = false;
8262 
8263     // Scan the components from the base to the complete expression.
8264     auto CI = Components.rbegin();
8265     auto CE = Components.rend();
8266     auto I = CI;
8267 
8268     // Track if the map information being generated is the first for a list of
8269     // components.
8270     bool IsExpressionFirstInfo = true;
8271     Address BP = Address::invalid();
8272     const Expr *AssocExpr = I->getAssociatedExpression();
8273     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
8274     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8275     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
8276 
8277     if (isa<MemberExpr>(AssocExpr)) {
8278       // The base is the 'this' pointer. The content of the pointer is going
8279       // to be the base of the field being mapped.
8280       BP = CGF.LoadCXXThisAddress();
8281     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
8282                (OASE &&
8283                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
8284       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8285     } else if (OAShE &&
8286                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
8287       BP = Address(
8288           CGF.EmitScalarExpr(OAShE->getBase()),
8289           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
8290     } else {
8291       // The base is the reference to the variable.
8292       // BP = &Var.
8293       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8294       if (const auto *VD =
8295               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
8296         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8297                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8298           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8299               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
8300                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
8301             RequiresReference = true;
8302             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
8303           }
8304         }
8305       }
8306 
8307       // If the variable is a pointer and is being dereferenced (i.e. is not
8308       // the last component), the base has to be the pointer itself, not its
8309       // reference. References are ignored for mapping purposes.
8310       QualType Ty =
8311           I->getAssociatedDeclaration()->getType().getNonReferenceType();
8312       if (Ty->isAnyPointerType() && std::next(I) != CE) {
8313         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8314 
8315         // We do not need to generate individual map information for the
8316         // pointer, it can be associated with the combined storage.
8317         ++I;
8318       }
8319     }
8320 
8321     // Track whether a component of the list should be marked as MEMBER_OF some
8322     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8323     // in a component list should be marked as MEMBER_OF, all subsequent entries
8324     // do not belong to the base struct. E.g.
8325     // struct S2 s;
8326     // s.ps->ps->ps->f[:]
8327     //   (1) (2) (3) (4)
8328     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8329     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8330     // is the pointee of ps(2) which is not member of struct s, so it should not
8331     // be marked as such (it is still PTR_AND_OBJ).
8332     // The variable is initialized to false so that PTR_AND_OBJ entries which
8333     // are not struct members are not considered (e.g. array of pointers to
8334     // data).
8335     bool ShouldBeMemberOf = false;
8336 
8337     // Variable keeping track of whether or not we have encountered a component
8338     // in the component list which is a member expression. Useful when we have a
8339     // pointer or a final array section, in which case it is the previous
8340     // component in the list which tells us whether we have a member expression.
8341     // E.g. X.f[:]
8342     // While processing the final array section "[:]" it is "f" which tells us
8343     // whether we are dealing with a member of a declared struct.
8344     const MemberExpr *EncounteredME = nullptr;
8345 
8346     for (; I != CE; ++I) {
8347       // If the current component is member of a struct (parent struct) mark it.
8348       if (!EncounteredME) {
8349         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8350         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8351         // as MEMBER_OF the parent struct.
8352         if (EncounteredME)
8353           ShouldBeMemberOf = true;
8354       }
8355 
8356       auto Next = std::next(I);
8357 
8358       // We need to generate the addresses and sizes if this is the last
8359       // component, if the component is a pointer or if it is an array section
8360       // whose length can't be proved to be one. If this is a pointer, it
8361       // becomes the base address for the following components.
8362 
8363       // A final array section, is one whose length can't be proved to be one.
8364       bool IsFinalArraySection =
8365           isFinalArraySectionExpression(I->getAssociatedExpression());
8366 
8367       // Get information on whether the element is a pointer. Have to do a
8368       // special treatment for array sections given that they are built-in
8369       // types.
8370       const auto *OASE =
8371           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8372       const auto *OAShE =
8373           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8374       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8375       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8376       bool IsPointer =
8377           OAShE ||
8378           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8379                        .getCanonicalType()
8380                        ->isAnyPointerType()) ||
8381           I->getAssociatedExpression()->getType()->isAnyPointerType();
8382       bool IsNonDerefPointer = IsPointer && !UO && !BO;
8383 
8384       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
8385         // If this is not the last component, we expect the pointer to be
8386         // associated with an array expression or member expression.
8387         assert((Next == CE ||
8388                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8389                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8390                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8391                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8392                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8393                "Unexpected expression");
8394 
8395         Address LB = Address::invalid();
8396         if (OAShE) {
8397           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8398                        CGF.getContext().getTypeAlignInChars(
8399                            OAShE->getBase()->getType()));
8400         } else {
8401           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8402                    .getAddress(CGF);
8403         }
8404 
8405         // If this component is a pointer inside the base struct then we don't
8406         // need to create any entry for it - it will be combined with the object
8407         // it is pointing to into a single PTR_AND_OBJ entry.
8408         bool IsMemberPointer =
8409             IsPointer && EncounteredME &&
8410             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
8411              EncounteredME);
8412         if (!OverlappedElements.empty()) {
8413           // Handle base element with the info for overlapped elements.
8414           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8415           assert(Next == CE &&
8416                  "Expected last element for the overlapped elements.");
8417           assert(!IsPointer &&
8418                  "Unexpected base element with the pointer type.");
8419           // Mark the whole struct as the struct that requires allocation on the
8420           // device.
8421           PartialStruct.LowestElem = {0, LB};
8422           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8423               I->getAssociatedExpression()->getType());
8424           Address HB = CGF.Builder.CreateConstGEP(
8425               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
8426                                                               CGF.VoidPtrTy),
8427               TypeSize.getQuantity() - 1);
8428           PartialStruct.HighestElem = {
8429               std::numeric_limits<decltype(
8430                   PartialStruct.HighestElem.first)>::max(),
8431               HB};
8432           PartialStruct.Base = BP;
8433           // Emit data for non-overlapped data.
8434           OpenMPOffloadMappingFlags Flags =
8435               OMP_MAP_MEMBER_OF |
8436               getMapTypeBits(MapType, MapModifiers, IsImplicit,
8437                              /*AddPtrFlag=*/false,
8438                              /*AddIsTargetParamFlag=*/false);
8439           LB = BP;
8440           llvm::Value *Size = nullptr;
8441           // Do bitcopy of all non-overlapped structure elements.
8442           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8443                    Component : OverlappedElements) {
8444             Address ComponentLB = Address::invalid();
8445             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8446                  Component) {
8447               if (MC.getAssociatedDeclaration()) {
8448                 ComponentLB =
8449                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8450                         .getAddress(CGF);
8451                 Size = CGF.Builder.CreatePtrDiff(
8452                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8453                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8454                 break;
8455               }
8456             }
8457             BasePointers.push_back(BP.getPointer());
8458             Pointers.push_back(LB.getPointer());
8459             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
8460                                                       /*isSigned=*/true));
8461             Types.push_back(Flags);
8462             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8463           }
8464           BasePointers.push_back(BP.getPointer());
8465           Pointers.push_back(LB.getPointer());
8466           Size = CGF.Builder.CreatePtrDiff(
8467               CGF.EmitCastToVoidPtr(
8468                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
8469               CGF.EmitCastToVoidPtr(LB.getPointer()));
8470           Sizes.push_back(
8471               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8472           Types.push_back(Flags);
8473           break;
8474         }
8475         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8476         if (!IsMemberPointer) {
8477           BasePointers.push_back(BP.getPointer());
8478           Pointers.push_back(LB.getPointer());
8479           Sizes.push_back(
8480               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8481 
8482           // We need to add a pointer flag for each map that comes from the
8483           // same expression except for the first one. We also need to signal
8484           // this map is the first one that relates with the current capture
8485           // (there is a set of entries for each capture).
8486           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8487               MapType, MapModifiers, IsImplicit,
8488               !IsExpressionFirstInfo || RequiresReference,
8489               IsCaptureFirstInfo && !RequiresReference);
8490 
8491           if (!IsExpressionFirstInfo) {
8492             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8493             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8494             if (IsPointer)
8495               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8496                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8497 
8498             if (ShouldBeMemberOf) {
8499               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8500               // should be later updated with the correct value of MEMBER_OF.
8501               Flags |= OMP_MAP_MEMBER_OF;
8502               // From now on, all subsequent PTR_AND_OBJ entries should not be
8503               // marked as MEMBER_OF.
8504               ShouldBeMemberOf = false;
8505             }
8506           }
8507 
8508           Types.push_back(Flags);
8509         }
8510 
8511         // If we have encountered a member expression so far, keep track of the
8512         // mapped member. If the parent is "*this", then the value declaration
8513         // is nullptr.
8514         if (EncounteredME) {
8515           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8516           unsigned FieldIndex = FD->getFieldIndex();
8517 
8518           // Update info about the lowest and highest elements for this struct
8519           if (!PartialStruct.Base.isValid()) {
8520             PartialStruct.LowestElem = {FieldIndex, LB};
8521             PartialStruct.HighestElem = {FieldIndex, LB};
8522             PartialStruct.Base = BP;
8523           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8524             PartialStruct.LowestElem = {FieldIndex, LB};
8525           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8526             PartialStruct.HighestElem = {FieldIndex, LB};
8527           }
8528         }
8529 
8530         // If we have a final array section, we are done with this expression.
8531         if (IsFinalArraySection)
8532           break;
8533 
8534         // The pointer becomes the base for the next element.
8535         if (Next != CE)
8536           BP = LB;
8537 
8538         IsExpressionFirstInfo = false;
8539         IsCaptureFirstInfo = false;
8540       }
8541     }
8542   }
8543 
8544   /// Return the adjusted map modifiers if the declaration a capture refers to
8545   /// appears in a first-private clause. This is expected to be used only with
8546   /// directives that start with 'target'.
8547   MappableExprsHandler::OpenMPOffloadMappingFlags
8548   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8549     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8550 
8551     // A first private variable captured by reference will use only the
8552     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8553     // declaration is known as first-private in this handler.
8554     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8555       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8556           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8557         return MappableExprsHandler::OMP_MAP_ALWAYS |
8558                MappableExprsHandler::OMP_MAP_TO;
8559       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8560         return MappableExprsHandler::OMP_MAP_TO |
8561                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8562       return MappableExprsHandler::OMP_MAP_PRIVATE |
8563              MappableExprsHandler::OMP_MAP_TO;
8564     }
8565     return MappableExprsHandler::OMP_MAP_TO |
8566            MappableExprsHandler::OMP_MAP_FROM;
8567   }
8568 
8569   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8570     // Rotate by getFlagMemberOffset() bits.
8571     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8572                                                   << getFlagMemberOffset());
8573   }
8574 
8575   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8576                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8577     // If the entry is PTR_AND_OBJ but has not been marked with the special
8578     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8579     // marked as MEMBER_OF.
8580     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8581         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8582       return;
8583 
8584     // Reset the placeholder value to prepare the flag for the assignment of the
8585     // proper MEMBER_OF value.
8586     Flags &= ~OMP_MAP_MEMBER_OF;
8587     Flags |= MemberOfFlag;
8588   }
8589 
8590   void getPlainLayout(const CXXRecordDecl *RD,
8591                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8592                       bool AsBase) const {
8593     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8594 
8595     llvm::StructType *St =
8596         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8597 
8598     unsigned NumElements = St->getNumElements();
8599     llvm::SmallVector<
8600         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8601         RecordLayout(NumElements);
8602 
8603     // Fill bases.
8604     for (const auto &I : RD->bases()) {
8605       if (I.isVirtual())
8606         continue;
8607       const auto *Base = I.getType()->getAsCXXRecordDecl();
8608       // Ignore empty bases.
8609       if (Base->isEmpty() || CGF.getContext()
8610                                  .getASTRecordLayout(Base)
8611                                  .getNonVirtualSize()
8612                                  .isZero())
8613         continue;
8614 
8615       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8616       RecordLayout[FieldIndex] = Base;
8617     }
8618     // Fill in virtual bases.
8619     for (const auto &I : RD->vbases()) {
8620       const auto *Base = I.getType()->getAsCXXRecordDecl();
8621       // Ignore empty bases.
8622       if (Base->isEmpty())
8623         continue;
8624       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8625       if (RecordLayout[FieldIndex])
8626         continue;
8627       RecordLayout[FieldIndex] = Base;
8628     }
8629     // Fill in all the fields.
8630     assert(!RD->isUnion() && "Unexpected union.");
8631     for (const auto *Field : RD->fields()) {
8632       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8633       // will fill in later.)
8634       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8635         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8636         RecordLayout[FieldIndex] = Field;
8637       }
8638     }
8639     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8640              &Data : RecordLayout) {
8641       if (Data.isNull())
8642         continue;
8643       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8644         getPlainLayout(Base, Layout, /*AsBase=*/true);
8645       else
8646         Layout.push_back(Data.get<const FieldDecl *>());
8647     }
8648   }
8649 
8650 public:
8651   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8652       : CurDir(&Dir), CGF(CGF) {
8653     // Extract firstprivate clause information.
8654     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8655       for (const auto *D : C->varlists())
8656         FirstPrivateDecls.try_emplace(
8657             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8658     // Extract implicit firstprivates from uses_allocators clauses.
8659     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8660       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8661         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8662         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8663           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8664                                         /*Implicit=*/true);
8665         else if (const auto *VD = dyn_cast<VarDecl>(
8666                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8667                          ->getDecl()))
8668           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8669       }
8670     }
8671     // Extract device pointer clause information.
8672     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8673       for (auto L : C->component_lists())
8674         DevPointersMap[L.first].push_back(L.second);
8675   }
8676 
8677   /// Constructor for the declare mapper directive.
8678   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8679       : CurDir(&Dir), CGF(CGF) {}
8680 
8681   /// Generate code for the combined entry if we have a partially mapped struct
8682   /// and take care of the mapping flags of the arguments corresponding to
8683   /// individual struct members.
8684   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8685                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8686                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8687                          const StructRangeInfoTy &PartialStruct) const {
8688     // Base is the base of the struct
8689     BasePointers.push_back(PartialStruct.Base.getPointer());
8690     // Pointer is the address of the lowest element
8691     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8692     Pointers.push_back(LB);
8693     // Size is (addr of {highest+1} element) - (addr of lowest element)
8694     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8695     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8696     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8697     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8698     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8699     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8700                                                   /*isSigned=*/false);
8701     Sizes.push_back(Size);
8702     // Map type is always TARGET_PARAM
8703     Types.push_back(OMP_MAP_TARGET_PARAM);
8704     // Remove TARGET_PARAM flag from the first element
8705     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8706 
8707     // All other current entries will be MEMBER_OF the combined entry
8708     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8709     // 0xFFFF in the MEMBER_OF field).
8710     OpenMPOffloadMappingFlags MemberOfFlag =
8711         getMemberOfFlag(BasePointers.size() - 1);
8712     for (auto &M : CurTypes)
8713       setCorrectMemberOfFlag(M, MemberOfFlag);
8714   }
8715 
8716   /// Generate all the base pointers, section pointers, sizes and map
8717   /// types for the extracted mappable expressions. Also, for each item that
8718   /// relates with a device pointer, a pair of the relevant declaration and
8719   /// index where it occurs is appended to the device pointers info array.
8720   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8721                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8722                        MapFlagsArrayTy &Types) const {
8723     // We have to process the component lists that relate with the same
8724     // declaration in a single chunk so that we can generate the map flags
8725     // correctly. Therefore, we organize all lists in a map.
8726     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8727 
8728     // Helper function to fill the information map for the different supported
8729     // clauses.
8730     auto &&InfoGen = [&Info](
8731         const ValueDecl *D,
8732         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8733         OpenMPMapClauseKind MapType,
8734         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8735         bool ReturnDevicePointer, bool IsImplicit) {
8736       const ValueDecl *VD =
8737           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8738       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8739                             IsImplicit);
8740     };
8741 
8742     assert(CurDir.is<const OMPExecutableDirective *>() &&
8743            "Expect a executable directive");
8744     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8745     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8746       for (const auto L : C->component_lists()) {
8747         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8748             /*ReturnDevicePointer=*/false, C->isImplicit());
8749       }
8750     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8751       for (const auto L : C->component_lists()) {
8752         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8753             /*ReturnDevicePointer=*/false, C->isImplicit());
8754       }
8755     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8756       for (const auto L : C->component_lists()) {
8757         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8758             /*ReturnDevicePointer=*/false, C->isImplicit());
8759       }
8760 
8761     // Look at the use_device_ptr clause information and mark the existing map
8762     // entries as such. If there is no map information for an entry in the
8763     // use_device_ptr list, we create one with map type 'alloc' and zero size
8764     // section. It is the user fault if that was not mapped before. If there is
8765     // no map information and the pointer is a struct member, then we defer the
8766     // emission of that entry until the whole struct has been processed.
8767     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8768         DeferredInfo;
8769 
8770     for (const auto *C :
8771          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8772       for (const auto L : C->component_lists()) {
8773         assert(!L.second.empty() && "Not expecting empty list of components!");
8774         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8775         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8776         const Expr *IE = L.second.back().getAssociatedExpression();
8777         // If the first component is a member expression, we have to look into
8778         // 'this', which maps to null in the map of map information. Otherwise
8779         // look directly for the information.
8780         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8781 
8782         // We potentially have map information for this declaration already.
8783         // Look for the first set of components that refer to it.
8784         if (It != Info.end()) {
8785           auto CI = std::find_if(
8786               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8787                 return MI.Components.back().getAssociatedDeclaration() == VD;
8788               });
8789           // If we found a map entry, signal that the pointer has to be returned
8790           // and move on to the next declaration.
8791           if (CI != It->second.end()) {
8792             CI->ReturnDevicePointer = true;
8793             continue;
8794           }
8795         }
8796 
8797         // We didn't find any match in our map information - generate a zero
8798         // size array section - if the pointer is a struct member we defer this
8799         // action until the whole struct has been processed.
8800         if (isa<MemberExpr>(IE)) {
8801           // Insert the pointer into Info to be processed by
8802           // generateInfoForComponentList. Because it is a member pointer
8803           // without a pointee, no entry will be generated for it, therefore
8804           // we need to generate one after the whole struct has been processed.
8805           // Nonetheless, generateInfoForComponentList must be called to take
8806           // the pointer into account for the calculation of the range of the
8807           // partial struct.
8808           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8809                   /*ReturnDevicePointer=*/false, C->isImplicit());
8810           DeferredInfo[nullptr].emplace_back(IE, VD);
8811         } else {
8812           llvm::Value *Ptr =
8813               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8814           BasePointers.emplace_back(Ptr, VD);
8815           Pointers.push_back(Ptr);
8816           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8817           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8818         }
8819       }
8820     }
8821 
8822     for (const auto &M : Info) {
8823       // We need to know when we generate information for the first component
8824       // associated with a capture, because the mapping flags depend on it.
8825       bool IsFirstComponentList = true;
8826 
8827       // Temporary versions of arrays
8828       MapBaseValuesArrayTy CurBasePointers;
8829       MapValuesArrayTy CurPointers;
8830       MapValuesArrayTy CurSizes;
8831       MapFlagsArrayTy CurTypes;
8832       StructRangeInfoTy PartialStruct;
8833 
8834       for (const MapInfo &L : M.second) {
8835         assert(!L.Components.empty() &&
8836                "Not expecting declaration with no component lists.");
8837 
8838         // Remember the current base pointer index.
8839         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8840         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8841                                      CurBasePointers, CurPointers, CurSizes,
8842                                      CurTypes, PartialStruct,
8843                                      IsFirstComponentList, L.IsImplicit);
8844 
8845         // If this entry relates with a device pointer, set the relevant
8846         // declaration and add the 'return pointer' flag.
8847         if (L.ReturnDevicePointer) {
8848           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8849                  "Unexpected number of mapped base pointers.");
8850 
8851           const ValueDecl *RelevantVD =
8852               L.Components.back().getAssociatedDeclaration();
8853           assert(RelevantVD &&
8854                  "No relevant declaration related with device pointer??");
8855 
8856           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8857           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8858         }
8859         IsFirstComponentList = false;
8860       }
8861 
8862       // Append any pending zero-length pointers which are struct members and
8863       // used with use_device_ptr.
8864       auto CI = DeferredInfo.find(M.first);
8865       if (CI != DeferredInfo.end()) {
8866         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8867           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8868           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8869               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8870           CurBasePointers.emplace_back(BasePtr, L.VD);
8871           CurPointers.push_back(Ptr);
8872           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8873           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8874           // value MEMBER_OF=FFFF so that the entry is later updated with the
8875           // correct value of MEMBER_OF.
8876           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8877                              OMP_MAP_MEMBER_OF);
8878         }
8879       }
8880 
8881       // If there is an entry in PartialStruct it means we have a struct with
8882       // individual members mapped. Emit an extra combined entry.
8883       if (PartialStruct.Base.isValid())
8884         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8885                           PartialStruct);
8886 
8887       // We need to append the results of this capture to what we already have.
8888       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8889       Pointers.append(CurPointers.begin(), CurPointers.end());
8890       Sizes.append(CurSizes.begin(), CurSizes.end());
8891       Types.append(CurTypes.begin(), CurTypes.end());
8892     }
8893   }
8894 
8895   /// Generate all the base pointers, section pointers, sizes and map types for
8896   /// the extracted map clauses of user-defined mapper.
8897   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8898                                 MapValuesArrayTy &Pointers,
8899                                 MapValuesArrayTy &Sizes,
8900                                 MapFlagsArrayTy &Types) const {
8901     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8902            "Expect a declare mapper directive");
8903     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8904     // We have to process the component lists that relate with the same
8905     // declaration in a single chunk so that we can generate the map flags
8906     // correctly. Therefore, we organize all lists in a map.
8907     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8908 
8909     // Helper function to fill the information map for the different supported
8910     // clauses.
8911     auto &&InfoGen = [&Info](
8912         const ValueDecl *D,
8913         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8914         OpenMPMapClauseKind MapType,
8915         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8916         bool ReturnDevicePointer, bool IsImplicit) {
8917       const ValueDecl *VD =
8918           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8919       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8920                             IsImplicit);
8921     };
8922 
8923     for (const auto *C : CurMapperDir->clauselists()) {
8924       const auto *MC = cast<OMPMapClause>(C);
8925       for (const auto L : MC->component_lists()) {
8926         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8927                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8928       }
8929     }
8930 
8931     for (const auto &M : Info) {
8932       // We need to know when we generate information for the first component
8933       // associated with a capture, because the mapping flags depend on it.
8934       bool IsFirstComponentList = true;
8935 
8936       // Temporary versions of arrays
8937       MapBaseValuesArrayTy CurBasePointers;
8938       MapValuesArrayTy CurPointers;
8939       MapValuesArrayTy CurSizes;
8940       MapFlagsArrayTy CurTypes;
8941       StructRangeInfoTy PartialStruct;
8942 
8943       for (const MapInfo &L : M.second) {
8944         assert(!L.Components.empty() &&
8945                "Not expecting declaration with no component lists.");
8946         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8947                                      CurBasePointers, CurPointers, CurSizes,
8948                                      CurTypes, PartialStruct,
8949                                      IsFirstComponentList, L.IsImplicit);
8950         IsFirstComponentList = false;
8951       }
8952 
8953       // If there is an entry in PartialStruct it means we have a struct with
8954       // individual members mapped. Emit an extra combined entry.
8955       if (PartialStruct.Base.isValid())
8956         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8957                           PartialStruct);
8958 
8959       // We need to append the results of this capture to what we already have.
8960       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8961       Pointers.append(CurPointers.begin(), CurPointers.end());
8962       Sizes.append(CurSizes.begin(), CurSizes.end());
8963       Types.append(CurTypes.begin(), CurTypes.end());
8964     }
8965   }
8966 
8967   /// Emit capture info for lambdas for variables captured by reference.
8968   void generateInfoForLambdaCaptures(
8969       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8970       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8971       MapFlagsArrayTy &Types,
8972       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8973     const auto *RD = VD->getType()
8974                          .getCanonicalType()
8975                          .getNonReferenceType()
8976                          ->getAsCXXRecordDecl();
8977     if (!RD || !RD->isLambda())
8978       return;
8979     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8980     LValue VDLVal = CGF.MakeAddrLValue(
8981         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8982     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8983     FieldDecl *ThisCapture = nullptr;
8984     RD->getCaptureFields(Captures, ThisCapture);
8985     if (ThisCapture) {
8986       LValue ThisLVal =
8987           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8988       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8989       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8990                                  VDLVal.getPointer(CGF));
8991       BasePointers.push_back(ThisLVal.getPointer(CGF));
8992       Pointers.push_back(ThisLValVal.getPointer(CGF));
8993       Sizes.push_back(
8994           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8995                                     CGF.Int64Ty, /*isSigned=*/true));
8996       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8997                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8998     }
8999     for (const LambdaCapture &LC : RD->captures()) {
9000       if (!LC.capturesVariable())
9001         continue;
9002       const VarDecl *VD = LC.getCapturedVar();
9003       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9004         continue;
9005       auto It = Captures.find(VD);
9006       assert(It != Captures.end() && "Found lambda capture without field.");
9007       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9008       if (LC.getCaptureKind() == LCK_ByRef) {
9009         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9010         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9011                                    VDLVal.getPointer(CGF));
9012         BasePointers.push_back(VarLVal.getPointer(CGF));
9013         Pointers.push_back(VarLValVal.getPointer(CGF));
9014         Sizes.push_back(CGF.Builder.CreateIntCast(
9015             CGF.getTypeSize(
9016                 VD->getType().getCanonicalType().getNonReferenceType()),
9017             CGF.Int64Ty, /*isSigned=*/true));
9018       } else {
9019         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9020         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9021                                    VDLVal.getPointer(CGF));
9022         BasePointers.push_back(VarLVal.getPointer(CGF));
9023         Pointers.push_back(VarRVal.getScalarVal());
9024         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9025       }
9026       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9027                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9028     }
9029   }
9030 
9031   /// Set correct indices for lambdas captures.
9032   void adjustMemberOfForLambdaCaptures(
9033       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9034       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9035       MapFlagsArrayTy &Types) const {
9036     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9037       // Set correct member_of idx for all implicit lambda captures.
9038       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9039                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9040         continue;
9041       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9042       assert(BasePtr && "Unable to find base lambda address.");
9043       int TgtIdx = -1;
9044       for (unsigned J = I; J > 0; --J) {
9045         unsigned Idx = J - 1;
9046         if (Pointers[Idx] != BasePtr)
9047           continue;
9048         TgtIdx = Idx;
9049         break;
9050       }
9051       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9052       // All other current entries will be MEMBER_OF the combined entry
9053       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9054       // 0xFFFF in the MEMBER_OF field).
9055       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9056       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9057     }
9058   }
9059 
9060   /// Generate the base pointers, section pointers, sizes and map types
9061   /// associated to a given capture.
9062   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9063                               llvm::Value *Arg,
9064                               MapBaseValuesArrayTy &BasePointers,
9065                               MapValuesArrayTy &Pointers,
9066                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
9067                               StructRangeInfoTy &PartialStruct) const {
9068     assert(!Cap->capturesVariableArrayType() &&
9069            "Not expecting to generate map info for a variable array type!");
9070 
9071     // We need to know when we generating information for the first component
9072     const ValueDecl *VD = Cap->capturesThis()
9073                               ? nullptr
9074                               : Cap->getCapturedVar()->getCanonicalDecl();
9075 
9076     // If this declaration appears in a is_device_ptr clause we just have to
9077     // pass the pointer by value. If it is a reference to a declaration, we just
9078     // pass its value.
9079     if (DevPointersMap.count(VD)) {
9080       BasePointers.emplace_back(Arg, VD);
9081       Pointers.push_back(Arg);
9082       Sizes.push_back(
9083           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9084                                     CGF.Int64Ty, /*isSigned=*/true));
9085       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
9086       return;
9087     }
9088 
9089     using MapData =
9090         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9091                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
9092     SmallVector<MapData, 4> DeclComponentLists;
9093     assert(CurDir.is<const OMPExecutableDirective *>() &&
9094            "Expect a executable directive");
9095     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9096     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9097       for (const auto L : C->decl_component_lists(VD)) {
9098         assert(L.first == VD &&
9099                "We got information for the wrong declaration??");
9100         assert(!L.second.empty() &&
9101                "Not expecting declaration with no component lists.");
9102         DeclComponentLists.emplace_back(L.second, C->getMapType(),
9103                                         C->getMapTypeModifiers(),
9104                                         C->isImplicit());
9105       }
9106     }
9107 
9108     // Find overlapping elements (including the offset from the base element).
9109     llvm::SmallDenseMap<
9110         const MapData *,
9111         llvm::SmallVector<
9112             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9113         4>
9114         OverlappedData;
9115     size_t Count = 0;
9116     for (const MapData &L : DeclComponentLists) {
9117       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9118       OpenMPMapClauseKind MapType;
9119       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9120       bool IsImplicit;
9121       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9122       ++Count;
9123       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9124         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9125         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
9126         auto CI = Components.rbegin();
9127         auto CE = Components.rend();
9128         auto SI = Components1.rbegin();
9129         auto SE = Components1.rend();
9130         for (; CI != CE && SI != SE; ++CI, ++SI) {
9131           if (CI->getAssociatedExpression()->getStmtClass() !=
9132               SI->getAssociatedExpression()->getStmtClass())
9133             break;
9134           // Are we dealing with different variables/fields?
9135           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9136             break;
9137         }
9138         // Found overlapping if, at least for one component, reached the head of
9139         // the components list.
9140         if (CI == CE || SI == SE) {
9141           assert((CI != CE || SI != SE) &&
9142                  "Unexpected full match of the mapping components.");
9143           const MapData &BaseData = CI == CE ? L : L1;
9144           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9145               SI == SE ? Components : Components1;
9146           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9147           OverlappedElements.getSecond().push_back(SubData);
9148         }
9149       }
9150     }
9151     // Sort the overlapped elements for each item.
9152     llvm::SmallVector<const FieldDecl *, 4> Layout;
9153     if (!OverlappedData.empty()) {
9154       if (const auto *CRD =
9155               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
9156         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9157       else {
9158         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
9159         Layout.append(RD->field_begin(), RD->field_end());
9160       }
9161     }
9162     for (auto &Pair : OverlappedData) {
9163       llvm::sort(
9164           Pair.getSecond(),
9165           [&Layout](
9166               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9167               OMPClauseMappableExprCommon::MappableExprComponentListRef
9168                   Second) {
9169             auto CI = First.rbegin();
9170             auto CE = First.rend();
9171             auto SI = Second.rbegin();
9172             auto SE = Second.rend();
9173             for (; CI != CE && SI != SE; ++CI, ++SI) {
9174               if (CI->getAssociatedExpression()->getStmtClass() !=
9175                   SI->getAssociatedExpression()->getStmtClass())
9176                 break;
9177               // Are we dealing with different variables/fields?
9178               if (CI->getAssociatedDeclaration() !=
9179                   SI->getAssociatedDeclaration())
9180                 break;
9181             }
9182 
9183             // Lists contain the same elements.
9184             if (CI == CE && SI == SE)
9185               return false;
9186 
9187             // List with less elements is less than list with more elements.
9188             if (CI == CE || SI == SE)
9189               return CI == CE;
9190 
9191             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9192             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9193             if (FD1->getParent() == FD2->getParent())
9194               return FD1->getFieldIndex() < FD2->getFieldIndex();
9195             const auto It =
9196                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9197                   return FD == FD1 || FD == FD2;
9198                 });
9199             return *It == FD1;
9200           });
9201     }
9202 
9203     // Associated with a capture, because the mapping flags depend on it.
9204     // Go through all of the elements with the overlapped elements.
9205     for (const auto &Pair : OverlappedData) {
9206       const MapData &L = *Pair.getFirst();
9207       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9208       OpenMPMapClauseKind MapType;
9209       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9210       bool IsImplicit;
9211       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9212       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9213           OverlappedComponents = Pair.getSecond();
9214       bool IsFirstComponentList = true;
9215       generateInfoForComponentList(MapType, MapModifiers, Components,
9216                                    BasePointers, Pointers, Sizes, Types,
9217                                    PartialStruct, IsFirstComponentList,
9218                                    IsImplicit, OverlappedComponents);
9219     }
9220     // Go through other elements without overlapped elements.
9221     bool IsFirstComponentList = OverlappedData.empty();
9222     for (const MapData &L : DeclComponentLists) {
9223       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9224       OpenMPMapClauseKind MapType;
9225       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9226       bool IsImplicit;
9227       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9228       auto It = OverlappedData.find(&L);
9229       if (It == OverlappedData.end())
9230         generateInfoForComponentList(MapType, MapModifiers, Components,
9231                                      BasePointers, Pointers, Sizes, Types,
9232                                      PartialStruct, IsFirstComponentList,
9233                                      IsImplicit);
9234       IsFirstComponentList = false;
9235     }
9236   }
9237 
9238   /// Generate the base pointers, section pointers, sizes and map types
9239   /// associated with the declare target link variables.
9240   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
9241                                         MapValuesArrayTy &Pointers,
9242                                         MapValuesArrayTy &Sizes,
9243                                         MapFlagsArrayTy &Types) const {
9244     assert(CurDir.is<const OMPExecutableDirective *>() &&
9245            "Expect a executable directive");
9246     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9247     // Map other list items in the map clause which are not captured variables
9248     // but "declare target link" global variables.
9249     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9250       for (const auto L : C->component_lists()) {
9251         if (!L.first)
9252           continue;
9253         const auto *VD = dyn_cast<VarDecl>(L.first);
9254         if (!VD)
9255           continue;
9256         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9257             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9258         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9259             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
9260           continue;
9261         StructRangeInfoTy PartialStruct;
9262         generateInfoForComponentList(
9263             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
9264             Pointers, Sizes, Types, PartialStruct,
9265             /*IsFirstComponentList=*/true, C->isImplicit());
9266         assert(!PartialStruct.Base.isValid() &&
9267                "No partial structs for declare target link expected.");
9268       }
9269     }
9270   }
9271 
9272   /// Generate the default map information for a given capture \a CI,
9273   /// record field declaration \a RI and captured value \a CV.
9274   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9275                               const FieldDecl &RI, llvm::Value *CV,
9276                               MapBaseValuesArrayTy &CurBasePointers,
9277                               MapValuesArrayTy &CurPointers,
9278                               MapValuesArrayTy &CurSizes,
9279                               MapFlagsArrayTy &CurMapTypes) const {
9280     bool IsImplicit = true;
9281     // Do the default mapping.
9282     if (CI.capturesThis()) {
9283       CurBasePointers.push_back(CV);
9284       CurPointers.push_back(CV);
9285       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9286       CurSizes.push_back(
9287           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9288                                     CGF.Int64Ty, /*isSigned=*/true));
9289       // Default map type.
9290       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9291     } else if (CI.capturesVariableByCopy()) {
9292       CurBasePointers.push_back(CV);
9293       CurPointers.push_back(CV);
9294       if (!RI.getType()->isAnyPointerType()) {
9295         // We have to signal to the runtime captures passed by value that are
9296         // not pointers.
9297         CurMapTypes.push_back(OMP_MAP_LITERAL);
9298         CurSizes.push_back(CGF.Builder.CreateIntCast(
9299             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9300       } else {
9301         // Pointers are implicitly mapped with a zero size and no flags
9302         // (other than first map that is added for all implicit maps).
9303         CurMapTypes.push_back(OMP_MAP_NONE);
9304         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9305       }
9306       const VarDecl *VD = CI.getCapturedVar();
9307       auto I = FirstPrivateDecls.find(VD);
9308       if (I != FirstPrivateDecls.end())
9309         IsImplicit = I->getSecond();
9310     } else {
9311       assert(CI.capturesVariable() && "Expected captured reference.");
9312       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9313       QualType ElementType = PtrTy->getPointeeType();
9314       CurSizes.push_back(CGF.Builder.CreateIntCast(
9315           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9316       // The default map type for a scalar/complex type is 'to' because by
9317       // default the value doesn't have to be retrieved. For an aggregate
9318       // type, the default is 'tofrom'.
9319       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
9320       const VarDecl *VD = CI.getCapturedVar();
9321       auto I = FirstPrivateDecls.find(VD);
9322       if (I != FirstPrivateDecls.end() &&
9323           VD->getType().isConstant(CGF.getContext())) {
9324         llvm::Constant *Addr =
9325             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9326         // Copy the value of the original variable to the new global copy.
9327         CGF.Builder.CreateMemCpy(
9328             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9329             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9330             CurSizes.back(), /*IsVolatile=*/false);
9331         // Use new global variable as the base pointers.
9332         CurBasePointers.push_back(Addr);
9333         CurPointers.push_back(Addr);
9334       } else {
9335         CurBasePointers.push_back(CV);
9336         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9337           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9338               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9339               AlignmentSource::Decl));
9340           CurPointers.push_back(PtrAddr.getPointer());
9341         } else {
9342           CurPointers.push_back(CV);
9343         }
9344       }
9345       if (I != FirstPrivateDecls.end())
9346         IsImplicit = I->getSecond();
9347     }
9348     // Every default map produces a single argument which is a target parameter.
9349     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
9350 
9351     // Add flag stating this is an implicit map.
9352     if (IsImplicit)
9353       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
9354   }
9355 };
9356 } // anonymous namespace
9357 
9358 /// Emit the arrays used to pass the captures and map information to the
9359 /// offloading runtime library. If there is no map or capture information,
9360 /// return nullptr by reference.
9361 static void
9362 emitOffloadingArrays(CodeGenFunction &CGF,
9363                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
9364                      MappableExprsHandler::MapValuesArrayTy &Pointers,
9365                      MappableExprsHandler::MapValuesArrayTy &Sizes,
9366                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
9367                      CGOpenMPRuntime::TargetDataInfo &Info) {
9368   CodeGenModule &CGM = CGF.CGM;
9369   ASTContext &Ctx = CGF.getContext();
9370 
9371   // Reset the array information.
9372   Info.clearArrayInfo();
9373   Info.NumberOfPtrs = BasePointers.size();
9374 
9375   if (Info.NumberOfPtrs) {
9376     // Detect if we have any capture size requiring runtime evaluation of the
9377     // size so that a constant array could be eventually used.
9378     bool hasRuntimeEvaluationCaptureSize = false;
9379     for (llvm::Value *S : Sizes)
9380       if (!isa<llvm::Constant>(S)) {
9381         hasRuntimeEvaluationCaptureSize = true;
9382         break;
9383       }
9384 
9385     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9386     QualType PointerArrayType = Ctx.getConstantArrayType(
9387         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9388         /*IndexTypeQuals=*/0);
9389 
9390     Info.BasePointersArray =
9391         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9392     Info.PointersArray =
9393         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9394 
9395     // If we don't have any VLA types or other types that require runtime
9396     // evaluation, we can use a constant array for the map sizes, otherwise we
9397     // need to fill up the arrays as we do for the pointers.
9398     QualType Int64Ty =
9399         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9400     if (hasRuntimeEvaluationCaptureSize) {
9401       QualType SizeArrayType = Ctx.getConstantArrayType(
9402           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9403           /*IndexTypeQuals=*/0);
9404       Info.SizesArray =
9405           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9406     } else {
9407       // We expect all the sizes to be constant, so we collect them to create
9408       // a constant array.
9409       SmallVector<llvm::Constant *, 16> ConstSizes;
9410       for (llvm::Value *S : Sizes)
9411         ConstSizes.push_back(cast<llvm::Constant>(S));
9412 
9413       auto *SizesArrayInit = llvm::ConstantArray::get(
9414           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9415       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9416       auto *SizesArrayGbl = new llvm::GlobalVariable(
9417           CGM.getModule(), SizesArrayInit->getType(),
9418           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9419           SizesArrayInit, Name);
9420       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9421       Info.SizesArray = SizesArrayGbl;
9422     }
9423 
9424     // The map types are always constant so we don't need to generate code to
9425     // fill arrays. Instead, we create an array constant.
9426     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
9427     llvm::copy(MapTypes, Mapping.begin());
9428     llvm::Constant *MapTypesArrayInit =
9429         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9430     std::string MaptypesName =
9431         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9432     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9433         CGM.getModule(), MapTypesArrayInit->getType(),
9434         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9435         MapTypesArrayInit, MaptypesName);
9436     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9437     Info.MapTypesArray = MapTypesArrayGbl;
9438 
9439     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9440       llvm::Value *BPVal = *BasePointers[I];
9441       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9442           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9443           Info.BasePointersArray, 0, I);
9444       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9445           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9446       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9447       CGF.Builder.CreateStore(BPVal, BPAddr);
9448 
9449       if (Info.requiresDevicePointerInfo())
9450         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
9451           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9452 
9453       llvm::Value *PVal = Pointers[I];
9454       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9455           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9456           Info.PointersArray, 0, I);
9457       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9458           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9459       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9460       CGF.Builder.CreateStore(PVal, PAddr);
9461 
9462       if (hasRuntimeEvaluationCaptureSize) {
9463         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9464             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9465             Info.SizesArray,
9466             /*Idx0=*/0,
9467             /*Idx1=*/I);
9468         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9469         CGF.Builder.CreateStore(
9470             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
9471             SAddr);
9472       }
9473     }
9474   }
9475 }
9476 
9477 /// Emit the arguments to be passed to the runtime library based on the
9478 /// arrays of pointers, sizes and map types.
9479 static void emitOffloadingArraysArgument(
9480     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9481     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9482     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
9483   CodeGenModule &CGM = CGF.CGM;
9484   if (Info.NumberOfPtrs) {
9485     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9486         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9487         Info.BasePointersArray,
9488         /*Idx0=*/0, /*Idx1=*/0);
9489     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9490         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9491         Info.PointersArray,
9492         /*Idx0=*/0,
9493         /*Idx1=*/0);
9494     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9495         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9496         /*Idx0=*/0, /*Idx1=*/0);
9497     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9498         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9499         Info.MapTypesArray,
9500         /*Idx0=*/0,
9501         /*Idx1=*/0);
9502   } else {
9503     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9504     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9505     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9506     MapTypesArrayArg =
9507         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9508   }
9509 }
9510 
9511 /// Check for inner distribute directive.
9512 static const OMPExecutableDirective *
9513 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9514   const auto *CS = D.getInnermostCapturedStmt();
9515   const auto *Body =
9516       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9517   const Stmt *ChildStmt =
9518       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9519 
9520   if (const auto *NestedDir =
9521           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9522     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9523     switch (D.getDirectiveKind()) {
9524     case OMPD_target:
9525       if (isOpenMPDistributeDirective(DKind))
9526         return NestedDir;
9527       if (DKind == OMPD_teams) {
9528         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9529             /*IgnoreCaptured=*/true);
9530         if (!Body)
9531           return nullptr;
9532         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9533         if (const auto *NND =
9534                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9535           DKind = NND->getDirectiveKind();
9536           if (isOpenMPDistributeDirective(DKind))
9537             return NND;
9538         }
9539       }
9540       return nullptr;
9541     case OMPD_target_teams:
9542       if (isOpenMPDistributeDirective(DKind))
9543         return NestedDir;
9544       return nullptr;
9545     case OMPD_target_parallel:
9546     case OMPD_target_simd:
9547     case OMPD_target_parallel_for:
9548     case OMPD_target_parallel_for_simd:
9549       return nullptr;
9550     case OMPD_target_teams_distribute:
9551     case OMPD_target_teams_distribute_simd:
9552     case OMPD_target_teams_distribute_parallel_for:
9553     case OMPD_target_teams_distribute_parallel_for_simd:
9554     case OMPD_parallel:
9555     case OMPD_for:
9556     case OMPD_parallel_for:
9557     case OMPD_parallel_master:
9558     case OMPD_parallel_sections:
9559     case OMPD_for_simd:
9560     case OMPD_parallel_for_simd:
9561     case OMPD_cancel:
9562     case OMPD_cancellation_point:
9563     case OMPD_ordered:
9564     case OMPD_threadprivate:
9565     case OMPD_allocate:
9566     case OMPD_task:
9567     case OMPD_simd:
9568     case OMPD_sections:
9569     case OMPD_section:
9570     case OMPD_single:
9571     case OMPD_master:
9572     case OMPD_critical:
9573     case OMPD_taskyield:
9574     case OMPD_barrier:
9575     case OMPD_taskwait:
9576     case OMPD_taskgroup:
9577     case OMPD_atomic:
9578     case OMPD_flush:
9579     case OMPD_depobj:
9580     case OMPD_scan:
9581     case OMPD_teams:
9582     case OMPD_target_data:
9583     case OMPD_target_exit_data:
9584     case OMPD_target_enter_data:
9585     case OMPD_distribute:
9586     case OMPD_distribute_simd:
9587     case OMPD_distribute_parallel_for:
9588     case OMPD_distribute_parallel_for_simd:
9589     case OMPD_teams_distribute:
9590     case OMPD_teams_distribute_simd:
9591     case OMPD_teams_distribute_parallel_for:
9592     case OMPD_teams_distribute_parallel_for_simd:
9593     case OMPD_target_update:
9594     case OMPD_declare_simd:
9595     case OMPD_declare_variant:
9596     case OMPD_begin_declare_variant:
9597     case OMPD_end_declare_variant:
9598     case OMPD_declare_target:
9599     case OMPD_end_declare_target:
9600     case OMPD_declare_reduction:
9601     case OMPD_declare_mapper:
9602     case OMPD_taskloop:
9603     case OMPD_taskloop_simd:
9604     case OMPD_master_taskloop:
9605     case OMPD_master_taskloop_simd:
9606     case OMPD_parallel_master_taskloop:
9607     case OMPD_parallel_master_taskloop_simd:
9608     case OMPD_requires:
9609     case OMPD_unknown:
9610       llvm_unreachable("Unexpected directive.");
9611     }
9612   }
9613 
9614   return nullptr;
9615 }
9616 
9617 /// Emit the user-defined mapper function. The code generation follows the
9618 /// pattern in the example below.
9619 /// \code
9620 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9621 ///                                           void *base, void *begin,
9622 ///                                           int64_t size, int64_t type) {
9623 ///   // Allocate space for an array section first.
9624 ///   if (size > 1 && !maptype.IsDelete)
9625 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9626 ///                                 size*sizeof(Ty), clearToFrom(type));
9627 ///   // Map members.
9628 ///   for (unsigned i = 0; i < size; i++) {
9629 ///     // For each component specified by this mapper:
9630 ///     for (auto c : all_components) {
9631 ///       if (c.hasMapper())
9632 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9633 ///                       c.arg_type);
9634 ///       else
9635 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9636 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9637 ///     }
9638 ///   }
9639 ///   // Delete the array section.
9640 ///   if (size > 1 && maptype.IsDelete)
9641 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9642 ///                                 size*sizeof(Ty), clearToFrom(type));
9643 /// }
9644 /// \endcode
9645 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9646                                             CodeGenFunction *CGF) {
9647   if (UDMMap.count(D) > 0)
9648     return;
9649   ASTContext &C = CGM.getContext();
9650   QualType Ty = D->getType();
9651   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9652   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9653   auto *MapperVarDecl =
9654       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9655   SourceLocation Loc = D->getLocation();
9656   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9657 
9658   // Prepare mapper function arguments and attributes.
9659   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9660                               C.VoidPtrTy, ImplicitParamDecl::Other);
9661   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9662                             ImplicitParamDecl::Other);
9663   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9664                              C.VoidPtrTy, ImplicitParamDecl::Other);
9665   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9666                             ImplicitParamDecl::Other);
9667   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9668                             ImplicitParamDecl::Other);
9669   FunctionArgList Args;
9670   Args.push_back(&HandleArg);
9671   Args.push_back(&BaseArg);
9672   Args.push_back(&BeginArg);
9673   Args.push_back(&SizeArg);
9674   Args.push_back(&TypeArg);
9675   const CGFunctionInfo &FnInfo =
9676       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9677   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9678   SmallString<64> TyStr;
9679   llvm::raw_svector_ostream Out(TyStr);
9680   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9681   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9682   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9683                                     Name, &CGM.getModule());
9684   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9685   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9686   // Start the mapper function code generation.
9687   CodeGenFunction MapperCGF(CGM);
9688   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9689   // Compute the starting and end addreses of array elements.
9690   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9691       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9692       C.getPointerType(Int64Ty), Loc);
9693   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9694       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9695       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9696   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9697   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9698       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9699       C.getPointerType(Int64Ty), Loc);
9700   // Prepare common arguments for array initiation and deletion.
9701   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9702       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9703       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9704   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9705       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9706       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9707   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9708       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9709       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9710 
9711   // Emit array initiation if this is an array section and \p MapType indicates
9712   // that memory allocation is required.
9713   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9714   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9715                              ElementSize, HeadBB, /*IsInit=*/true);
9716 
9717   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9718 
9719   // Emit the loop header block.
9720   MapperCGF.EmitBlock(HeadBB);
9721   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9722   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9723   // Evaluate whether the initial condition is satisfied.
9724   llvm::Value *IsEmpty =
9725       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9726   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9727   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9728 
9729   // Emit the loop body block.
9730   MapperCGF.EmitBlock(BodyBB);
9731   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9732       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9733   PtrPHI->addIncoming(PtrBegin, EntryBB);
9734   Address PtrCurrent =
9735       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9736                           .getAlignment()
9737                           .alignmentOfArrayElement(ElementSize));
9738   // Privatize the declared variable of mapper to be the current array element.
9739   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9740   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9741     return MapperCGF
9742         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9743         .getAddress(MapperCGF);
9744   });
9745   (void)Scope.Privatize();
9746 
9747   // Get map clause information. Fill up the arrays with all mapped variables.
9748   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9749   MappableExprsHandler::MapValuesArrayTy Pointers;
9750   MappableExprsHandler::MapValuesArrayTy Sizes;
9751   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9752   MappableExprsHandler MEHandler(*D, MapperCGF);
9753   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9754 
9755   // Call the runtime API __tgt_mapper_num_components to get the number of
9756   // pre-existing components.
9757   llvm::Value *OffloadingArgs[] = {Handle};
9758   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9759       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9760   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9761       PreviousSize,
9762       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9763 
9764   // Fill up the runtime mapper handle for all components.
9765   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9766     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9767         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9768     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9769         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9770     llvm::Value *CurSizeArg = Sizes[I];
9771 
9772     // Extract the MEMBER_OF field from the map type.
9773     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9774     MapperCGF.EmitBlock(MemberBB);
9775     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9776     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9777         OriMapType,
9778         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9779     llvm::BasicBlock *MemberCombineBB =
9780         MapperCGF.createBasicBlock("omp.member.combine");
9781     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9782     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9783     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9784     // Add the number of pre-existing components to the MEMBER_OF field if it
9785     // is valid.
9786     MapperCGF.EmitBlock(MemberCombineBB);
9787     llvm::Value *CombinedMember =
9788         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9789     // Do nothing if it is not a member of previous components.
9790     MapperCGF.EmitBlock(TypeBB);
9791     llvm::PHINode *MemberMapType =
9792         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9793     MemberMapType->addIncoming(OriMapType, MemberBB);
9794     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9795 
9796     // Combine the map type inherited from user-defined mapper with that
9797     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9798     // bits of the \a MapType, which is the input argument of the mapper
9799     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9800     // bits of MemberMapType.
9801     // [OpenMP 5.0], 1.2.6. map-type decay.
9802     //        | alloc |  to   | from  | tofrom | release | delete
9803     // ----------------------------------------------------------
9804     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9805     // to     | alloc |  to   | alloc |   to   | release | delete
9806     // from   | alloc | alloc | from  |  from  | release | delete
9807     // tofrom | alloc |  to   | from  | tofrom | release | delete
9808     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9809         MapType,
9810         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9811                                    MappableExprsHandler::OMP_MAP_FROM));
9812     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9813     llvm::BasicBlock *AllocElseBB =
9814         MapperCGF.createBasicBlock("omp.type.alloc.else");
9815     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9816     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9817     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9818     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9819     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9820     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9821     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9822     MapperCGF.EmitBlock(AllocBB);
9823     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9824         MemberMapType,
9825         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9826                                      MappableExprsHandler::OMP_MAP_FROM)));
9827     MapperCGF.Builder.CreateBr(EndBB);
9828     MapperCGF.EmitBlock(AllocElseBB);
9829     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9830         LeftToFrom,
9831         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9832     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9833     // In case of to, clear OMP_MAP_FROM.
9834     MapperCGF.EmitBlock(ToBB);
9835     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9836         MemberMapType,
9837         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9838     MapperCGF.Builder.CreateBr(EndBB);
9839     MapperCGF.EmitBlock(ToElseBB);
9840     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9841         LeftToFrom,
9842         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9843     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9844     // In case of from, clear OMP_MAP_TO.
9845     MapperCGF.EmitBlock(FromBB);
9846     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9847         MemberMapType,
9848         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9849     // In case of tofrom, do nothing.
9850     MapperCGF.EmitBlock(EndBB);
9851     llvm::PHINode *CurMapType =
9852         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9853     CurMapType->addIncoming(AllocMapType, AllocBB);
9854     CurMapType->addIncoming(ToMapType, ToBB);
9855     CurMapType->addIncoming(FromMapType, FromBB);
9856     CurMapType->addIncoming(MemberMapType, ToElseBB);
9857 
9858     // TODO: call the corresponding mapper function if a user-defined mapper is
9859     // associated with this map clause.
9860     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9861     // data structure.
9862     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9863                                      CurSizeArg, CurMapType};
9864     MapperCGF.EmitRuntimeCall(
9865         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9866         OffloadingArgs);
9867   }
9868 
9869   // Update the pointer to point to the next element that needs to be mapped,
9870   // and check whether we have mapped all elements.
9871   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9872       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9873   PtrPHI->addIncoming(PtrNext, BodyBB);
9874   llvm::Value *IsDone =
9875       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9876   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9877   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9878 
9879   MapperCGF.EmitBlock(ExitBB);
9880   // Emit array deletion if this is an array section and \p MapType indicates
9881   // that deletion is required.
9882   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9883                              ElementSize, DoneBB, /*IsInit=*/false);
9884 
9885   // Emit the function exit block.
9886   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9887   MapperCGF.FinishFunction();
9888   UDMMap.try_emplace(D, Fn);
9889   if (CGF) {
9890     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9891     Decls.second.push_back(D);
9892   }
9893 }
9894 
9895 /// Emit the array initialization or deletion portion for user-defined mapper
9896 /// code generation. First, it evaluates whether an array section is mapped and
9897 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9898 /// true, and \a MapType indicates to not delete this array, array
9899 /// initialization code is generated. If \a IsInit is false, and \a MapType
9900 /// indicates to not this array, array deletion code is generated.
9901 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9902     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9903     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9904     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9905   StringRef Prefix = IsInit ? ".init" : ".del";
9906 
9907   // Evaluate if this is an array section.
9908   llvm::BasicBlock *IsDeleteBB =
9909       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9910   llvm::BasicBlock *BodyBB =
9911       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9912   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9913       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9914   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9915 
9916   // Evaluate if we are going to delete this section.
9917   MapperCGF.EmitBlock(IsDeleteBB);
9918   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9919       MapType,
9920       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9921   llvm::Value *DeleteCond;
9922   if (IsInit) {
9923     DeleteCond = MapperCGF.Builder.CreateIsNull(
9924         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9925   } else {
9926     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9927         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9928   }
9929   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9930 
9931   MapperCGF.EmitBlock(BodyBB);
9932   // Get the array size by multiplying element size and element number (i.e., \p
9933   // Size).
9934   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9935       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9936   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9937   // memory allocation/deletion purpose only.
9938   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9939       MapType,
9940       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9941                                    MappableExprsHandler::OMP_MAP_FROM)));
9942   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9943   // data structure.
9944   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9945   MapperCGF.EmitRuntimeCall(
9946       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9947 }
9948 
9949 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9950     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9951     llvm::Value *DeviceID,
9952     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9953                                      const OMPLoopDirective &D)>
9954         SizeEmitter) {
9955   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9956   const OMPExecutableDirective *TD = &D;
9957   // Get nested teams distribute kind directive, if any.
9958   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9959     TD = getNestedDistributeDirective(CGM.getContext(), D);
9960   if (!TD)
9961     return;
9962   const auto *LD = cast<OMPLoopDirective>(TD);
9963   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9964                                                      PrePostActionTy &) {
9965     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9966       llvm::Value *Args[] = {DeviceID, NumIterations};
9967       CGF.EmitRuntimeCall(
9968           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9969     }
9970   };
9971   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9972 }
9973 
9974 void CGOpenMPRuntime::emitTargetCall(
9975     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9976     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9977     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9978     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9979                                      const OMPLoopDirective &D)>
9980         SizeEmitter) {
9981   if (!CGF.HaveInsertPoint())
9982     return;
9983 
9984   assert(OutlinedFn && "Invalid outlined function!");
9985 
9986   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9987   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9988   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9989   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9990                                             PrePostActionTy &) {
9991     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9992   };
9993   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9994 
9995   CodeGenFunction::OMPTargetDataInfo InputInfo;
9996   llvm::Value *MapTypesArray = nullptr;
9997   // Fill up the pointer arrays and transfer execution to the device.
9998   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9999                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
10000                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10001     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10002       // Reverse offloading is not supported, so just execute on the host.
10003       if (RequiresOuterTask) {
10004         CapturedVars.clear();
10005         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10006       }
10007       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10008       return;
10009     }
10010 
10011     // On top of the arrays that were filled up, the target offloading call
10012     // takes as arguments the device id as well as the host pointer. The host
10013     // pointer is used by the runtime library to identify the current target
10014     // region, so it only has to be unique and not necessarily point to
10015     // anything. It could be the pointer to the outlined function that
10016     // implements the target region, but we aren't using that so that the
10017     // compiler doesn't need to keep that, and could therefore inline the host
10018     // function if proven worthwhile during optimization.
10019 
10020     // From this point on, we need to have an ID of the target region defined.
10021     assert(OutlinedFnID && "Invalid outlined function ID!");
10022 
10023     // Emit device ID if any.
10024     llvm::Value *DeviceID;
10025     if (Device.getPointer()) {
10026       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10027               Device.getInt() == OMPC_DEVICE_device_num) &&
10028              "Expected device_num modifier.");
10029       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10030       DeviceID =
10031           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10032     } else {
10033       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10034     }
10035 
10036     // Emit the number of elements in the offloading arrays.
10037     llvm::Value *PointerNum =
10038         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10039 
10040     // Return value of the runtime offloading call.
10041     llvm::Value *Return;
10042 
10043     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10044     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10045 
10046     // Emit tripcount for the target loop-based directive.
10047     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10048 
10049     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10050     // The target region is an outlined function launched by the runtime
10051     // via calls __tgt_target() or __tgt_target_teams().
10052     //
10053     // __tgt_target() launches a target region with one team and one thread,
10054     // executing a serial region.  This master thread may in turn launch
10055     // more threads within its team upon encountering a parallel region,
10056     // however, no additional teams can be launched on the device.
10057     //
10058     // __tgt_target_teams() launches a target region with one or more teams,
10059     // each with one or more threads.  This call is required for target
10060     // constructs such as:
10061     //  'target teams'
10062     //  'target' / 'teams'
10063     //  'target teams distribute parallel for'
10064     //  'target parallel'
10065     // and so on.
10066     //
10067     // Note that on the host and CPU targets, the runtime implementation of
10068     // these calls simply call the outlined function without forking threads.
10069     // The outlined functions themselves have runtime calls to
10070     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10071     // the compiler in emitTeamsCall() and emitParallelCall().
10072     //
10073     // In contrast, on the NVPTX target, the implementation of
10074     // __tgt_target_teams() launches a GPU kernel with the requested number
10075     // of teams and threads so no additional calls to the runtime are required.
10076     if (NumTeams) {
10077       // If we have NumTeams defined this means that we have an enclosed teams
10078       // region. Therefore we also expect to have NumThreads defined. These two
10079       // values should be defined in the presence of a teams directive,
10080       // regardless of having any clauses associated. If the user is using teams
10081       // but no clauses, these two values will be the default that should be
10082       // passed to the runtime library - a 32-bit integer with the value zero.
10083       assert(NumThreads && "Thread limit expression should be available along "
10084                            "with number of teams.");
10085       llvm::Value *OffloadingArgs[] = {DeviceID,
10086                                        OutlinedFnID,
10087                                        PointerNum,
10088                                        InputInfo.BasePointersArray.getPointer(),
10089                                        InputInfo.PointersArray.getPointer(),
10090                                        InputInfo.SizesArray.getPointer(),
10091                                        MapTypesArray,
10092                                        NumTeams,
10093                                        NumThreads};
10094       Return = CGF.EmitRuntimeCall(
10095           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
10096                                           : OMPRTL__tgt_target_teams),
10097           OffloadingArgs);
10098     } else {
10099       llvm::Value *OffloadingArgs[] = {DeviceID,
10100                                        OutlinedFnID,
10101                                        PointerNum,
10102                                        InputInfo.BasePointersArray.getPointer(),
10103                                        InputInfo.PointersArray.getPointer(),
10104                                        InputInfo.SizesArray.getPointer(),
10105                                        MapTypesArray};
10106       Return = CGF.EmitRuntimeCall(
10107           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
10108                                           : OMPRTL__tgt_target),
10109           OffloadingArgs);
10110     }
10111 
10112     // Check the error code and execute the host version if required.
10113     llvm::BasicBlock *OffloadFailedBlock =
10114         CGF.createBasicBlock("omp_offload.failed");
10115     llvm::BasicBlock *OffloadContBlock =
10116         CGF.createBasicBlock("omp_offload.cont");
10117     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10118     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10119 
10120     CGF.EmitBlock(OffloadFailedBlock);
10121     if (RequiresOuterTask) {
10122       CapturedVars.clear();
10123       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10124     }
10125     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10126     CGF.EmitBranch(OffloadContBlock);
10127 
10128     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10129   };
10130 
10131   // Notify that the host version must be executed.
10132   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10133                     RequiresOuterTask](CodeGenFunction &CGF,
10134                                        PrePostActionTy &) {
10135     if (RequiresOuterTask) {
10136       CapturedVars.clear();
10137       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10138     }
10139     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10140   };
10141 
10142   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10143                           &CapturedVars, RequiresOuterTask,
10144                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10145     // Fill up the arrays with all the captured variables.
10146     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10147     MappableExprsHandler::MapValuesArrayTy Pointers;
10148     MappableExprsHandler::MapValuesArrayTy Sizes;
10149     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10150 
10151     // Get mappable expression information.
10152     MappableExprsHandler MEHandler(D, CGF);
10153     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10154 
10155     auto RI = CS.getCapturedRecordDecl()->field_begin();
10156     auto CV = CapturedVars.begin();
10157     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10158                                               CE = CS.capture_end();
10159          CI != CE; ++CI, ++RI, ++CV) {
10160       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
10161       MappableExprsHandler::MapValuesArrayTy CurPointers;
10162       MappableExprsHandler::MapValuesArrayTy CurSizes;
10163       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
10164       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10165 
10166       // VLA sizes are passed to the outlined region by copy and do not have map
10167       // information associated.
10168       if (CI->capturesVariableArrayType()) {
10169         CurBasePointers.push_back(*CV);
10170         CurPointers.push_back(*CV);
10171         CurSizes.push_back(CGF.Builder.CreateIntCast(
10172             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10173         // Copy to the device as an argument. No need to retrieve it.
10174         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10175                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10176                               MappableExprsHandler::OMP_MAP_IMPLICIT);
10177       } else {
10178         // If we have any information in the map clause, we use it, otherwise we
10179         // just do a default mapping.
10180         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
10181                                          CurSizes, CurMapTypes, PartialStruct);
10182         if (CurBasePointers.empty())
10183           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
10184                                            CurPointers, CurSizes, CurMapTypes);
10185         // Generate correct mapping for variables captured by reference in
10186         // lambdas.
10187         if (CI->capturesVariable())
10188           MEHandler.generateInfoForLambdaCaptures(
10189               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
10190               CurMapTypes, LambdaPointers);
10191       }
10192       // We expect to have at least an element of information for this capture.
10193       assert(!CurBasePointers.empty() &&
10194              "Non-existing map pointer for capture!");
10195       assert(CurBasePointers.size() == CurPointers.size() &&
10196              CurBasePointers.size() == CurSizes.size() &&
10197              CurBasePointers.size() == CurMapTypes.size() &&
10198              "Inconsistent map information sizes!");
10199 
10200       // If there is an entry in PartialStruct it means we have a struct with
10201       // individual members mapped. Emit an extra combined entry.
10202       if (PartialStruct.Base.isValid())
10203         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
10204                                     CurMapTypes, PartialStruct);
10205 
10206       // We need to append the results of this capture to what we already have.
10207       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
10208       Pointers.append(CurPointers.begin(), CurPointers.end());
10209       Sizes.append(CurSizes.begin(), CurSizes.end());
10210       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
10211     }
10212     // Adjust MEMBER_OF flags for the lambdas captures.
10213     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
10214                                               Pointers, MapTypes);
10215     // Map other list items in the map clause which are not captured variables
10216     // but "declare target link" global variables.
10217     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
10218                                                MapTypes);
10219 
10220     TargetDataInfo Info;
10221     // Fill up the arrays and create the arguments.
10222     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10223     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10224                                  Info.PointersArray, Info.SizesArray,
10225                                  Info.MapTypesArray, Info);
10226     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10227     InputInfo.BasePointersArray =
10228         Address(Info.BasePointersArray, CGM.getPointerAlign());
10229     InputInfo.PointersArray =
10230         Address(Info.PointersArray, CGM.getPointerAlign());
10231     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10232     MapTypesArray = Info.MapTypesArray;
10233     if (RequiresOuterTask)
10234       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10235     else
10236       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10237   };
10238 
10239   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10240                              CodeGenFunction &CGF, PrePostActionTy &) {
10241     if (RequiresOuterTask) {
10242       CodeGenFunction::OMPTargetDataInfo InputInfo;
10243       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10244     } else {
10245       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10246     }
10247   };
10248 
10249   // If we have a target function ID it means that we need to support
10250   // offloading, otherwise, just execute on the host. We need to execute on host
10251   // regardless of the conditional in the if clause if, e.g., the user do not
10252   // specify target triples.
10253   if (OutlinedFnID) {
10254     if (IfCond) {
10255       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10256     } else {
10257       RegionCodeGenTy ThenRCG(TargetThenGen);
10258       ThenRCG(CGF);
10259     }
10260   } else {
10261     RegionCodeGenTy ElseRCG(TargetElseGen);
10262     ElseRCG(CGF);
10263   }
10264 }
10265 
10266 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10267                                                     StringRef ParentName) {
10268   if (!S)
10269     return;
10270 
10271   // Codegen OMP target directives that offload compute to the device.
10272   bool RequiresDeviceCodegen =
10273       isa<OMPExecutableDirective>(S) &&
10274       isOpenMPTargetExecutionDirective(
10275           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10276 
10277   if (RequiresDeviceCodegen) {
10278     const auto &E = *cast<OMPExecutableDirective>(S);
10279     unsigned DeviceID;
10280     unsigned FileID;
10281     unsigned Line;
10282     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10283                              FileID, Line);
10284 
10285     // Is this a target region that should not be emitted as an entry point? If
10286     // so just signal we are done with this target region.
10287     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10288                                                             ParentName, Line))
10289       return;
10290 
10291     switch (E.getDirectiveKind()) {
10292     case OMPD_target:
10293       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10294                                                    cast<OMPTargetDirective>(E));
10295       break;
10296     case OMPD_target_parallel:
10297       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10298           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10299       break;
10300     case OMPD_target_teams:
10301       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10302           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10303       break;
10304     case OMPD_target_teams_distribute:
10305       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10306           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10307       break;
10308     case OMPD_target_teams_distribute_simd:
10309       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10310           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10311       break;
10312     case OMPD_target_parallel_for:
10313       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10314           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10315       break;
10316     case OMPD_target_parallel_for_simd:
10317       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10318           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10319       break;
10320     case OMPD_target_simd:
10321       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10322           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10323       break;
10324     case OMPD_target_teams_distribute_parallel_for:
10325       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10326           CGM, ParentName,
10327           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10328       break;
10329     case OMPD_target_teams_distribute_parallel_for_simd:
10330       CodeGenFunction::
10331           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10332               CGM, ParentName,
10333               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10334       break;
10335     case OMPD_parallel:
10336     case OMPD_for:
10337     case OMPD_parallel_for:
10338     case OMPD_parallel_master:
10339     case OMPD_parallel_sections:
10340     case OMPD_for_simd:
10341     case OMPD_parallel_for_simd:
10342     case OMPD_cancel:
10343     case OMPD_cancellation_point:
10344     case OMPD_ordered:
10345     case OMPD_threadprivate:
10346     case OMPD_allocate:
10347     case OMPD_task:
10348     case OMPD_simd:
10349     case OMPD_sections:
10350     case OMPD_section:
10351     case OMPD_single:
10352     case OMPD_master:
10353     case OMPD_critical:
10354     case OMPD_taskyield:
10355     case OMPD_barrier:
10356     case OMPD_taskwait:
10357     case OMPD_taskgroup:
10358     case OMPD_atomic:
10359     case OMPD_flush:
10360     case OMPD_depobj:
10361     case OMPD_scan:
10362     case OMPD_teams:
10363     case OMPD_target_data:
10364     case OMPD_target_exit_data:
10365     case OMPD_target_enter_data:
10366     case OMPD_distribute:
10367     case OMPD_distribute_simd:
10368     case OMPD_distribute_parallel_for:
10369     case OMPD_distribute_parallel_for_simd:
10370     case OMPD_teams_distribute:
10371     case OMPD_teams_distribute_simd:
10372     case OMPD_teams_distribute_parallel_for:
10373     case OMPD_teams_distribute_parallel_for_simd:
10374     case OMPD_target_update:
10375     case OMPD_declare_simd:
10376     case OMPD_declare_variant:
10377     case OMPD_begin_declare_variant:
10378     case OMPD_end_declare_variant:
10379     case OMPD_declare_target:
10380     case OMPD_end_declare_target:
10381     case OMPD_declare_reduction:
10382     case OMPD_declare_mapper:
10383     case OMPD_taskloop:
10384     case OMPD_taskloop_simd:
10385     case OMPD_master_taskloop:
10386     case OMPD_master_taskloop_simd:
10387     case OMPD_parallel_master_taskloop:
10388     case OMPD_parallel_master_taskloop_simd:
10389     case OMPD_requires:
10390     case OMPD_unknown:
10391       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10392     }
10393     return;
10394   }
10395 
10396   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10397     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10398       return;
10399 
10400     scanForTargetRegionsFunctions(
10401         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
10402     return;
10403   }
10404 
10405   // If this is a lambda function, look into its body.
10406   if (const auto *L = dyn_cast<LambdaExpr>(S))
10407     S = L->getBody();
10408 
10409   // Keep looking for target regions recursively.
10410   for (const Stmt *II : S->children())
10411     scanForTargetRegionsFunctions(II, ParentName);
10412 }
10413 
10414 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10415   // If emitting code for the host, we do not process FD here. Instead we do
10416   // the normal code generation.
10417   if (!CGM.getLangOpts().OpenMPIsDevice) {
10418     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10419       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10420           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10421       // Do not emit device_type(nohost) functions for the host.
10422       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10423         return true;
10424     }
10425     return false;
10426   }
10427 
10428   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10429   // Try to detect target regions in the function.
10430   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10431     StringRef Name = CGM.getMangledName(GD);
10432     scanForTargetRegionsFunctions(FD->getBody(), Name);
10433     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10434         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10435     // Do not emit device_type(nohost) functions for the host.
10436     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10437       return true;
10438   }
10439 
10440   // Do not to emit function if it is not marked as declare target.
10441   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10442          AlreadyEmittedTargetDecls.count(VD) == 0;
10443 }
10444 
10445 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10446   if (!CGM.getLangOpts().OpenMPIsDevice)
10447     return false;
10448 
10449   // Check if there are Ctors/Dtors in this declaration and look for target
10450   // regions in it. We use the complete variant to produce the kernel name
10451   // mangling.
10452   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10453   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10454     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10455       StringRef ParentName =
10456           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10457       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10458     }
10459     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10460       StringRef ParentName =
10461           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10462       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10463     }
10464   }
10465 
10466   // Do not to emit variable if it is not marked as declare target.
10467   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10468       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10469           cast<VarDecl>(GD.getDecl()));
10470   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10471       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10472        HasRequiresUnifiedSharedMemory)) {
10473     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10474     return true;
10475   }
10476   return false;
10477 }
10478 
10479 llvm::Constant *
10480 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10481                                                 const VarDecl *VD) {
10482   assert(VD->getType().isConstant(CGM.getContext()) &&
10483          "Expected constant variable.");
10484   StringRef VarName;
10485   llvm::Constant *Addr;
10486   llvm::GlobalValue::LinkageTypes Linkage;
10487   QualType Ty = VD->getType();
10488   SmallString<128> Buffer;
10489   {
10490     unsigned DeviceID;
10491     unsigned FileID;
10492     unsigned Line;
10493     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10494                              FileID, Line);
10495     llvm::raw_svector_ostream OS(Buffer);
10496     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10497        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10498     VarName = OS.str();
10499   }
10500   Linkage = llvm::GlobalValue::InternalLinkage;
10501   Addr =
10502       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10503                                   getDefaultFirstprivateAddressSpace());
10504   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10505   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10506   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10507   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10508       VarName, Addr, VarSize,
10509       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10510   return Addr;
10511 }
10512 
10513 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10514                                                    llvm::Constant *Addr) {
10515   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10516       !CGM.getLangOpts().OpenMPIsDevice)
10517     return;
10518   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10519       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10520   if (!Res) {
10521     if (CGM.getLangOpts().OpenMPIsDevice) {
10522       // Register non-target variables being emitted in device code (debug info
10523       // may cause this).
10524       StringRef VarName = CGM.getMangledName(VD);
10525       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10526     }
10527     return;
10528   }
10529   // Register declare target variables.
10530   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10531   StringRef VarName;
10532   CharUnits VarSize;
10533   llvm::GlobalValue::LinkageTypes Linkage;
10534 
10535   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10536       !HasRequiresUnifiedSharedMemory) {
10537     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10538     VarName = CGM.getMangledName(VD);
10539     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10540       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10541       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10542     } else {
10543       VarSize = CharUnits::Zero();
10544     }
10545     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10546     // Temp solution to prevent optimizations of the internal variables.
10547     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10548       std::string RefName = getName({VarName, "ref"});
10549       if (!CGM.GetGlobalValue(RefName)) {
10550         llvm::Constant *AddrRef =
10551             getOrCreateInternalVariable(Addr->getType(), RefName);
10552         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10553         GVAddrRef->setConstant(/*Val=*/true);
10554         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10555         GVAddrRef->setInitializer(Addr);
10556         CGM.addCompilerUsedGlobal(GVAddrRef);
10557       }
10558     }
10559   } else {
10560     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10561             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10562              HasRequiresUnifiedSharedMemory)) &&
10563            "Declare target attribute must link or to with unified memory.");
10564     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10565       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10566     else
10567       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10568 
10569     if (CGM.getLangOpts().OpenMPIsDevice) {
10570       VarName = Addr->getName();
10571       Addr = nullptr;
10572     } else {
10573       VarName = getAddrOfDeclareTargetVar(VD).getName();
10574       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10575     }
10576     VarSize = CGM.getPointerSize();
10577     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10578   }
10579 
10580   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10581       VarName, Addr, VarSize, Flags, Linkage);
10582 }
10583 
10584 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10585   if (isa<FunctionDecl>(GD.getDecl()) ||
10586       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10587     return emitTargetFunctions(GD);
10588 
10589   return emitTargetGlobalVariable(GD);
10590 }
10591 
10592 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10593   for (const VarDecl *VD : DeferredGlobalVariables) {
10594     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10595         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10596     if (!Res)
10597       continue;
10598     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10599         !HasRequiresUnifiedSharedMemory) {
10600       CGM.EmitGlobal(VD);
10601     } else {
10602       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10603               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10604                HasRequiresUnifiedSharedMemory)) &&
10605              "Expected link clause or to clause with unified memory.");
10606       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10607     }
10608   }
10609 }
10610 
10611 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10612     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10613   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10614          " Expected target-based directive.");
10615 }
10616 
10617 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10618   for (const OMPClause *Clause : D->clauselists()) {
10619     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10620       HasRequiresUnifiedSharedMemory = true;
10621     } else if (const auto *AC =
10622                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10623       switch (AC->getAtomicDefaultMemOrderKind()) {
10624       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10625         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10626         break;
10627       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10628         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10629         break;
10630       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10631         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10632         break;
10633       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10634         break;
10635       }
10636     }
10637   }
10638 }
10639 
10640 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10641   return RequiresAtomicOrdering;
10642 }
10643 
10644 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10645                                                        LangAS &AS) {
10646   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10647     return false;
10648   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10649   switch(A->getAllocatorType()) {
10650   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10651   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10652   // Not supported, fallback to the default mem space.
10653   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10654   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10655   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10656   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10657   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10658   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10659   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10660     AS = LangAS::Default;
10661     return true;
10662   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10663     llvm_unreachable("Expected predefined allocator for the variables with the "
10664                      "static storage.");
10665   }
10666   return false;
10667 }
10668 
10669 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10670   return HasRequiresUnifiedSharedMemory;
10671 }
10672 
10673 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10674     CodeGenModule &CGM)
10675     : CGM(CGM) {
10676   if (CGM.getLangOpts().OpenMPIsDevice) {
10677     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10678     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10679   }
10680 }
10681 
10682 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10683   if (CGM.getLangOpts().OpenMPIsDevice)
10684     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10685 }
10686 
10687 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10688   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10689     return true;
10690 
10691   const auto *D = cast<FunctionDecl>(GD.getDecl());
10692   // Do not to emit function if it is marked as declare target as it was already
10693   // emitted.
10694   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10695     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10696       if (auto *F = dyn_cast_or_null<llvm::Function>(
10697               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10698         return !F->isDeclaration();
10699       return false;
10700     }
10701     return true;
10702   }
10703 
10704   return !AlreadyEmittedTargetDecls.insert(D).second;
10705 }
10706 
10707 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10708   // If we don't have entries or if we are emitting code for the device, we
10709   // don't need to do anything.
10710   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10711       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10712       (OffloadEntriesInfoManager.empty() &&
10713        !HasEmittedDeclareTargetRegion &&
10714        !HasEmittedTargetRegion))
10715     return nullptr;
10716 
10717   // Create and register the function that handles the requires directives.
10718   ASTContext &C = CGM.getContext();
10719 
10720   llvm::Function *RequiresRegFn;
10721   {
10722     CodeGenFunction CGF(CGM);
10723     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10724     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10725     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10726     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10727     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10728     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10729     // TODO: check for other requires clauses.
10730     // The requires directive takes effect only when a target region is
10731     // present in the compilation unit. Otherwise it is ignored and not
10732     // passed to the runtime. This avoids the runtime from throwing an error
10733     // for mismatching requires clauses across compilation units that don't
10734     // contain at least 1 target region.
10735     assert((HasEmittedTargetRegion ||
10736             HasEmittedDeclareTargetRegion ||
10737             !OffloadEntriesInfoManager.empty()) &&
10738            "Target or declare target region expected.");
10739     if (HasRequiresUnifiedSharedMemory)
10740       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10741     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10742         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10743     CGF.FinishFunction();
10744   }
10745   return RequiresRegFn;
10746 }
10747 
10748 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10749                                     const OMPExecutableDirective &D,
10750                                     SourceLocation Loc,
10751                                     llvm::Function *OutlinedFn,
10752                                     ArrayRef<llvm::Value *> CapturedVars) {
10753   if (!CGF.HaveInsertPoint())
10754     return;
10755 
10756   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10757   CodeGenFunction::RunCleanupsScope Scope(CGF);
10758 
10759   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10760   llvm::Value *Args[] = {
10761       RTLoc,
10762       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10763       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10764   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10765   RealArgs.append(std::begin(Args), std::end(Args));
10766   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10767 
10768   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10769   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10770 }
10771 
10772 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10773                                          const Expr *NumTeams,
10774                                          const Expr *ThreadLimit,
10775                                          SourceLocation Loc) {
10776   if (!CGF.HaveInsertPoint())
10777     return;
10778 
10779   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10780 
10781   llvm::Value *NumTeamsVal =
10782       NumTeams
10783           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10784                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10785           : CGF.Builder.getInt32(0);
10786 
10787   llvm::Value *ThreadLimitVal =
10788       ThreadLimit
10789           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10790                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10791           : CGF.Builder.getInt32(0);
10792 
10793   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10794   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10795                                      ThreadLimitVal};
10796   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10797                       PushNumTeamsArgs);
10798 }
10799 
10800 void CGOpenMPRuntime::emitTargetDataCalls(
10801     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10802     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10803   if (!CGF.HaveInsertPoint())
10804     return;
10805 
10806   // Action used to replace the default codegen action and turn privatization
10807   // off.
10808   PrePostActionTy NoPrivAction;
10809 
10810   // Generate the code for the opening of the data environment. Capture all the
10811   // arguments of the runtime call by reference because they are used in the
10812   // closing of the region.
10813   auto &&BeginThenGen = [this, &D, Device, &Info,
10814                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10815     // Fill up the arrays with all the mapped variables.
10816     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10817     MappableExprsHandler::MapValuesArrayTy Pointers;
10818     MappableExprsHandler::MapValuesArrayTy Sizes;
10819     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10820 
10821     // Get map clause information.
10822     MappableExprsHandler MCHandler(D, CGF);
10823     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10824 
10825     // Fill up the arrays and create the arguments.
10826     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10827 
10828     llvm::Value *BasePointersArrayArg = nullptr;
10829     llvm::Value *PointersArrayArg = nullptr;
10830     llvm::Value *SizesArrayArg = nullptr;
10831     llvm::Value *MapTypesArrayArg = nullptr;
10832     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10833                                  SizesArrayArg, MapTypesArrayArg, Info);
10834 
10835     // Emit device ID if any.
10836     llvm::Value *DeviceID = nullptr;
10837     if (Device) {
10838       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10839                                            CGF.Int64Ty, /*isSigned=*/true);
10840     } else {
10841       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10842     }
10843 
10844     // Emit the number of elements in the offloading arrays.
10845     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10846 
10847     llvm::Value *OffloadingArgs[] = {
10848         DeviceID,         PointerNum,    BasePointersArrayArg,
10849         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10850     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10851                         OffloadingArgs);
10852 
10853     // If device pointer privatization is required, emit the body of the region
10854     // here. It will have to be duplicated: with and without privatization.
10855     if (!Info.CaptureDeviceAddrMap.empty())
10856       CodeGen(CGF);
10857   };
10858 
10859   // Generate code for the closing of the data region.
10860   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10861                                             PrePostActionTy &) {
10862     assert(Info.isValid() && "Invalid data environment closing arguments.");
10863 
10864     llvm::Value *BasePointersArrayArg = nullptr;
10865     llvm::Value *PointersArrayArg = nullptr;
10866     llvm::Value *SizesArrayArg = nullptr;
10867     llvm::Value *MapTypesArrayArg = nullptr;
10868     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10869                                  SizesArrayArg, MapTypesArrayArg, Info);
10870 
10871     // Emit device ID if any.
10872     llvm::Value *DeviceID = nullptr;
10873     if (Device) {
10874       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10875                                            CGF.Int64Ty, /*isSigned=*/true);
10876     } else {
10877       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10878     }
10879 
10880     // Emit the number of elements in the offloading arrays.
10881     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10882 
10883     llvm::Value *OffloadingArgs[] = {
10884         DeviceID,         PointerNum,    BasePointersArrayArg,
10885         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10886     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10887                         OffloadingArgs);
10888   };
10889 
10890   // If we need device pointer privatization, we need to emit the body of the
10891   // region with no privatization in the 'else' branch of the conditional.
10892   // Otherwise, we don't have to do anything.
10893   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10894                                                          PrePostActionTy &) {
10895     if (!Info.CaptureDeviceAddrMap.empty()) {
10896       CodeGen.setAction(NoPrivAction);
10897       CodeGen(CGF);
10898     }
10899   };
10900 
10901   // We don't have to do anything to close the region if the if clause evaluates
10902   // to false.
10903   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10904 
10905   if (IfCond) {
10906     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10907   } else {
10908     RegionCodeGenTy RCG(BeginThenGen);
10909     RCG(CGF);
10910   }
10911 
10912   // If we don't require privatization of device pointers, we emit the body in
10913   // between the runtime calls. This avoids duplicating the body code.
10914   if (Info.CaptureDeviceAddrMap.empty()) {
10915     CodeGen.setAction(NoPrivAction);
10916     CodeGen(CGF);
10917   }
10918 
10919   if (IfCond) {
10920     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10921   } else {
10922     RegionCodeGenTy RCG(EndThenGen);
10923     RCG(CGF);
10924   }
10925 }
10926 
10927 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10928     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10929     const Expr *Device) {
10930   if (!CGF.HaveInsertPoint())
10931     return;
10932 
10933   assert((isa<OMPTargetEnterDataDirective>(D) ||
10934           isa<OMPTargetExitDataDirective>(D) ||
10935           isa<OMPTargetUpdateDirective>(D)) &&
10936          "Expecting either target enter, exit data, or update directives.");
10937 
10938   CodeGenFunction::OMPTargetDataInfo InputInfo;
10939   llvm::Value *MapTypesArray = nullptr;
10940   // Generate the code for the opening of the data environment.
10941   auto &&ThenGen = [this, &D, Device, &InputInfo,
10942                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10943     // Emit device ID if any.
10944     llvm::Value *DeviceID = nullptr;
10945     if (Device) {
10946       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10947                                            CGF.Int64Ty, /*isSigned=*/true);
10948     } else {
10949       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10950     }
10951 
10952     // Emit the number of elements in the offloading arrays.
10953     llvm::Constant *PointerNum =
10954         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10955 
10956     llvm::Value *OffloadingArgs[] = {DeviceID,
10957                                      PointerNum,
10958                                      InputInfo.BasePointersArray.getPointer(),
10959                                      InputInfo.PointersArray.getPointer(),
10960                                      InputInfo.SizesArray.getPointer(),
10961                                      MapTypesArray};
10962 
10963     // Select the right runtime function call for each expected standalone
10964     // directive.
10965     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10966     OpenMPRTLFunction RTLFn;
10967     switch (D.getDirectiveKind()) {
10968     case OMPD_target_enter_data:
10969       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10970                         : OMPRTL__tgt_target_data_begin;
10971       break;
10972     case OMPD_target_exit_data:
10973       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10974                         : OMPRTL__tgt_target_data_end;
10975       break;
10976     case OMPD_target_update:
10977       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10978                         : OMPRTL__tgt_target_data_update;
10979       break;
10980     case OMPD_parallel:
10981     case OMPD_for:
10982     case OMPD_parallel_for:
10983     case OMPD_parallel_master:
10984     case OMPD_parallel_sections:
10985     case OMPD_for_simd:
10986     case OMPD_parallel_for_simd:
10987     case OMPD_cancel:
10988     case OMPD_cancellation_point:
10989     case OMPD_ordered:
10990     case OMPD_threadprivate:
10991     case OMPD_allocate:
10992     case OMPD_task:
10993     case OMPD_simd:
10994     case OMPD_sections:
10995     case OMPD_section:
10996     case OMPD_single:
10997     case OMPD_master:
10998     case OMPD_critical:
10999     case OMPD_taskyield:
11000     case OMPD_barrier:
11001     case OMPD_taskwait:
11002     case OMPD_taskgroup:
11003     case OMPD_atomic:
11004     case OMPD_flush:
11005     case OMPD_depobj:
11006     case OMPD_scan:
11007     case OMPD_teams:
11008     case OMPD_target_data:
11009     case OMPD_distribute:
11010     case OMPD_distribute_simd:
11011     case OMPD_distribute_parallel_for:
11012     case OMPD_distribute_parallel_for_simd:
11013     case OMPD_teams_distribute:
11014     case OMPD_teams_distribute_simd:
11015     case OMPD_teams_distribute_parallel_for:
11016     case OMPD_teams_distribute_parallel_for_simd:
11017     case OMPD_declare_simd:
11018     case OMPD_declare_variant:
11019     case OMPD_begin_declare_variant:
11020     case OMPD_end_declare_variant:
11021     case OMPD_declare_target:
11022     case OMPD_end_declare_target:
11023     case OMPD_declare_reduction:
11024     case OMPD_declare_mapper:
11025     case OMPD_taskloop:
11026     case OMPD_taskloop_simd:
11027     case OMPD_master_taskloop:
11028     case OMPD_master_taskloop_simd:
11029     case OMPD_parallel_master_taskloop:
11030     case OMPD_parallel_master_taskloop_simd:
11031     case OMPD_target:
11032     case OMPD_target_simd:
11033     case OMPD_target_teams_distribute:
11034     case OMPD_target_teams_distribute_simd:
11035     case OMPD_target_teams_distribute_parallel_for:
11036     case OMPD_target_teams_distribute_parallel_for_simd:
11037     case OMPD_target_teams:
11038     case OMPD_target_parallel:
11039     case OMPD_target_parallel_for:
11040     case OMPD_target_parallel_for_simd:
11041     case OMPD_requires:
11042     case OMPD_unknown:
11043       llvm_unreachable("Unexpected standalone target data directive.");
11044       break;
11045     }
11046     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
11047   };
11048 
11049   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
11050                              CodeGenFunction &CGF, PrePostActionTy &) {
11051     // Fill up the arrays with all the mapped variables.
11052     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
11053     MappableExprsHandler::MapValuesArrayTy Pointers;
11054     MappableExprsHandler::MapValuesArrayTy Sizes;
11055     MappableExprsHandler::MapFlagsArrayTy MapTypes;
11056 
11057     // Get map clause information.
11058     MappableExprsHandler MEHandler(D, CGF);
11059     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
11060 
11061     TargetDataInfo Info;
11062     // Fill up the arrays and create the arguments.
11063     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
11064     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
11065                                  Info.PointersArray, Info.SizesArray,
11066                                  Info.MapTypesArray, Info);
11067     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11068     InputInfo.BasePointersArray =
11069         Address(Info.BasePointersArray, CGM.getPointerAlign());
11070     InputInfo.PointersArray =
11071         Address(Info.PointersArray, CGM.getPointerAlign());
11072     InputInfo.SizesArray =
11073         Address(Info.SizesArray, CGM.getPointerAlign());
11074     MapTypesArray = Info.MapTypesArray;
11075     if (D.hasClausesOfKind<OMPDependClause>())
11076       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11077     else
11078       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11079   };
11080 
11081   if (IfCond) {
11082     emitIfClause(CGF, IfCond, TargetThenGen,
11083                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11084   } else {
11085     RegionCodeGenTy ThenRCG(TargetThenGen);
11086     ThenRCG(CGF);
11087   }
11088 }
11089 
11090 namespace {
11091   /// Kind of parameter in a function with 'declare simd' directive.
11092   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11093   /// Attribute set of the parameter.
11094   struct ParamAttrTy {
11095     ParamKindTy Kind = Vector;
11096     llvm::APSInt StrideOrArg;
11097     llvm::APSInt Alignment;
11098   };
11099 } // namespace
11100 
11101 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11102                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11103   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11104   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11105   // of that clause. The VLEN value must be power of 2.
11106   // In other case the notion of the function`s "characteristic data type" (CDT)
11107   // is used to compute the vector length.
11108   // CDT is defined in the following order:
11109   //   a) For non-void function, the CDT is the return type.
11110   //   b) If the function has any non-uniform, non-linear parameters, then the
11111   //   CDT is the type of the first such parameter.
11112   //   c) If the CDT determined by a) or b) above is struct, union, or class
11113   //   type which is pass-by-value (except for the type that maps to the
11114   //   built-in complex data type), the characteristic data type is int.
11115   //   d) If none of the above three cases is applicable, the CDT is int.
11116   // The VLEN is then determined based on the CDT and the size of vector
11117   // register of that ISA for which current vector version is generated. The
11118   // VLEN is computed using the formula below:
11119   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11120   // where vector register size specified in section 3.2.1 Registers and the
11121   // Stack Frame of original AMD64 ABI document.
11122   QualType RetType = FD->getReturnType();
11123   if (RetType.isNull())
11124     return 0;
11125   ASTContext &C = FD->getASTContext();
11126   QualType CDT;
11127   if (!RetType.isNull() && !RetType->isVoidType()) {
11128     CDT = RetType;
11129   } else {
11130     unsigned Offset = 0;
11131     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11132       if (ParamAttrs[Offset].Kind == Vector)
11133         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11134       ++Offset;
11135     }
11136     if (CDT.isNull()) {
11137       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11138         if (ParamAttrs[I + Offset].Kind == Vector) {
11139           CDT = FD->getParamDecl(I)->getType();
11140           break;
11141         }
11142       }
11143     }
11144   }
11145   if (CDT.isNull())
11146     CDT = C.IntTy;
11147   CDT = CDT->getCanonicalTypeUnqualified();
11148   if (CDT->isRecordType() || CDT->isUnionType())
11149     CDT = C.IntTy;
11150   return C.getTypeSize(CDT);
11151 }
11152 
11153 static void
11154 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11155                            const llvm::APSInt &VLENVal,
11156                            ArrayRef<ParamAttrTy> ParamAttrs,
11157                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11158   struct ISADataTy {
11159     char ISA;
11160     unsigned VecRegSize;
11161   };
11162   ISADataTy ISAData[] = {
11163       {
11164           'b', 128
11165       }, // SSE
11166       {
11167           'c', 256
11168       }, // AVX
11169       {
11170           'd', 256
11171       }, // AVX2
11172       {
11173           'e', 512
11174       }, // AVX512
11175   };
11176   llvm::SmallVector<char, 2> Masked;
11177   switch (State) {
11178   case OMPDeclareSimdDeclAttr::BS_Undefined:
11179     Masked.push_back('N');
11180     Masked.push_back('M');
11181     break;
11182   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11183     Masked.push_back('N');
11184     break;
11185   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11186     Masked.push_back('M');
11187     break;
11188   }
11189   for (char Mask : Masked) {
11190     for (const ISADataTy &Data : ISAData) {
11191       SmallString<256> Buffer;
11192       llvm::raw_svector_ostream Out(Buffer);
11193       Out << "_ZGV" << Data.ISA << Mask;
11194       if (!VLENVal) {
11195         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11196         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11197         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11198       } else {
11199         Out << VLENVal;
11200       }
11201       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11202         switch (ParamAttr.Kind){
11203         case LinearWithVarStride:
11204           Out << 's' << ParamAttr.StrideOrArg;
11205           break;
11206         case Linear:
11207           Out << 'l';
11208           if (ParamAttr.StrideOrArg != 1)
11209             Out << ParamAttr.StrideOrArg;
11210           break;
11211         case Uniform:
11212           Out << 'u';
11213           break;
11214         case Vector:
11215           Out << 'v';
11216           break;
11217         }
11218         if (!!ParamAttr.Alignment)
11219           Out << 'a' << ParamAttr.Alignment;
11220       }
11221       Out << '_' << Fn->getName();
11222       Fn->addFnAttr(Out.str());
11223     }
11224   }
11225 }
11226 
11227 // This are the Functions that are needed to mangle the name of the
11228 // vector functions generated by the compiler, according to the rules
11229 // defined in the "Vector Function ABI specifications for AArch64",
11230 // available at
11231 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11232 
11233 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11234 ///
11235 /// TODO: Need to implement the behavior for reference marked with a
11236 /// var or no linear modifiers (1.b in the section). For this, we
11237 /// need to extend ParamKindTy to support the linear modifiers.
11238 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11239   QT = QT.getCanonicalType();
11240 
11241   if (QT->isVoidType())
11242     return false;
11243 
11244   if (Kind == ParamKindTy::Uniform)
11245     return false;
11246 
11247   if (Kind == ParamKindTy::Linear)
11248     return false;
11249 
11250   // TODO: Handle linear references with modifiers
11251 
11252   if (Kind == ParamKindTy::LinearWithVarStride)
11253     return false;
11254 
11255   return true;
11256 }
11257 
11258 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11259 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11260   QT = QT.getCanonicalType();
11261   unsigned Size = C.getTypeSize(QT);
11262 
11263   // Only scalars and complex within 16 bytes wide set PVB to true.
11264   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11265     return false;
11266 
11267   if (QT->isFloatingType())
11268     return true;
11269 
11270   if (QT->isIntegerType())
11271     return true;
11272 
11273   if (QT->isPointerType())
11274     return true;
11275 
11276   // TODO: Add support for complex types (section 3.1.2, item 2).
11277 
11278   return false;
11279 }
11280 
11281 /// Computes the lane size (LS) of a return type or of an input parameter,
11282 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11283 /// TODO: Add support for references, section 3.2.1, item 1.
11284 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11285   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11286     QualType PTy = QT.getCanonicalType()->getPointeeType();
11287     if (getAArch64PBV(PTy, C))
11288       return C.getTypeSize(PTy);
11289   }
11290   if (getAArch64PBV(QT, C))
11291     return C.getTypeSize(QT);
11292 
11293   return C.getTypeSize(C.getUIntPtrType());
11294 }
11295 
11296 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11297 // signature of the scalar function, as defined in 3.2.2 of the
11298 // AAVFABI.
11299 static std::tuple<unsigned, unsigned, bool>
11300 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11301   QualType RetType = FD->getReturnType().getCanonicalType();
11302 
11303   ASTContext &C = FD->getASTContext();
11304 
11305   bool OutputBecomesInput = false;
11306 
11307   llvm::SmallVector<unsigned, 8> Sizes;
11308   if (!RetType->isVoidType()) {
11309     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11310     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11311       OutputBecomesInput = true;
11312   }
11313   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11314     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11315     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11316   }
11317 
11318   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11319   // The LS of a function parameter / return value can only be a power
11320   // of 2, starting from 8 bits, up to 128.
11321   assert(std::all_of(Sizes.begin(), Sizes.end(),
11322                      [](unsigned Size) {
11323                        return Size == 8 || Size == 16 || Size == 32 ||
11324                               Size == 64 || Size == 128;
11325                      }) &&
11326          "Invalid size");
11327 
11328   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11329                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11330                          OutputBecomesInput);
11331 }
11332 
11333 /// Mangle the parameter part of the vector function name according to
11334 /// their OpenMP classification. The mangling function is defined in
11335 /// section 3.5 of the AAVFABI.
11336 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11337   SmallString<256> Buffer;
11338   llvm::raw_svector_ostream Out(Buffer);
11339   for (const auto &ParamAttr : ParamAttrs) {
11340     switch (ParamAttr.Kind) {
11341     case LinearWithVarStride:
11342       Out << "ls" << ParamAttr.StrideOrArg;
11343       break;
11344     case Linear:
11345       Out << 'l';
11346       // Don't print the step value if it is not present or if it is
11347       // equal to 1.
11348       if (ParamAttr.StrideOrArg != 1)
11349         Out << ParamAttr.StrideOrArg;
11350       break;
11351     case Uniform:
11352       Out << 'u';
11353       break;
11354     case Vector:
11355       Out << 'v';
11356       break;
11357     }
11358 
11359     if (!!ParamAttr.Alignment)
11360       Out << 'a' << ParamAttr.Alignment;
11361   }
11362 
11363   return std::string(Out.str());
11364 }
11365 
11366 // Function used to add the attribute. The parameter `VLEN` is
11367 // templated to allow the use of "x" when targeting scalable functions
11368 // for SVE.
11369 template <typename T>
11370 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11371                                  char ISA, StringRef ParSeq,
11372                                  StringRef MangledName, bool OutputBecomesInput,
11373                                  llvm::Function *Fn) {
11374   SmallString<256> Buffer;
11375   llvm::raw_svector_ostream Out(Buffer);
11376   Out << Prefix << ISA << LMask << VLEN;
11377   if (OutputBecomesInput)
11378     Out << "v";
11379   Out << ParSeq << "_" << MangledName;
11380   Fn->addFnAttr(Out.str());
11381 }
11382 
11383 // Helper function to generate the Advanced SIMD names depending on
11384 // the value of the NDS when simdlen is not present.
11385 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11386                                       StringRef Prefix, char ISA,
11387                                       StringRef ParSeq, StringRef MangledName,
11388                                       bool OutputBecomesInput,
11389                                       llvm::Function *Fn) {
11390   switch (NDS) {
11391   case 8:
11392     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11393                          OutputBecomesInput, Fn);
11394     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11395                          OutputBecomesInput, Fn);
11396     break;
11397   case 16:
11398     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11399                          OutputBecomesInput, Fn);
11400     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11401                          OutputBecomesInput, Fn);
11402     break;
11403   case 32:
11404     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11405                          OutputBecomesInput, Fn);
11406     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11407                          OutputBecomesInput, Fn);
11408     break;
11409   case 64:
11410   case 128:
11411     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11412                          OutputBecomesInput, Fn);
11413     break;
11414   default:
11415     llvm_unreachable("Scalar type is too wide.");
11416   }
11417 }
11418 
11419 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11420 static void emitAArch64DeclareSimdFunction(
11421     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11422     ArrayRef<ParamAttrTy> ParamAttrs,
11423     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11424     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11425 
11426   // Get basic data for building the vector signature.
11427   const auto Data = getNDSWDS(FD, ParamAttrs);
11428   const unsigned NDS = std::get<0>(Data);
11429   const unsigned WDS = std::get<1>(Data);
11430   const bool OutputBecomesInput = std::get<2>(Data);
11431 
11432   // Check the values provided via `simdlen` by the user.
11433   // 1. A `simdlen(1)` doesn't produce vector signatures,
11434   if (UserVLEN == 1) {
11435     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11436         DiagnosticsEngine::Warning,
11437         "The clause simdlen(1) has no effect when targeting aarch64.");
11438     CGM.getDiags().Report(SLoc, DiagID);
11439     return;
11440   }
11441 
11442   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11443   // Advanced SIMD output.
11444   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11445     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11446         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11447                                     "power of 2 when targeting Advanced SIMD.");
11448     CGM.getDiags().Report(SLoc, DiagID);
11449     return;
11450   }
11451 
11452   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11453   // limits.
11454   if (ISA == 's' && UserVLEN != 0) {
11455     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11456       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11457           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11458                                       "lanes in the architectural constraints "
11459                                       "for SVE (min is 128-bit, max is "
11460                                       "2048-bit, by steps of 128-bit)");
11461       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11462       return;
11463     }
11464   }
11465 
11466   // Sort out parameter sequence.
11467   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11468   StringRef Prefix = "_ZGV";
11469   // Generate simdlen from user input (if any).
11470   if (UserVLEN) {
11471     if (ISA == 's') {
11472       // SVE generates only a masked function.
11473       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11474                            OutputBecomesInput, Fn);
11475     } else {
11476       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11477       // Advanced SIMD generates one or two functions, depending on
11478       // the `[not]inbranch` clause.
11479       switch (State) {
11480       case OMPDeclareSimdDeclAttr::BS_Undefined:
11481         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11482                              OutputBecomesInput, Fn);
11483         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11484                              OutputBecomesInput, Fn);
11485         break;
11486       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11487         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11488                              OutputBecomesInput, Fn);
11489         break;
11490       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11491         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11492                              OutputBecomesInput, Fn);
11493         break;
11494       }
11495     }
11496   } else {
11497     // If no user simdlen is provided, follow the AAVFABI rules for
11498     // generating the vector length.
11499     if (ISA == 's') {
11500       // SVE, section 3.4.1, item 1.
11501       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11502                            OutputBecomesInput, Fn);
11503     } else {
11504       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11505       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11506       // two vector names depending on the use of the clause
11507       // `[not]inbranch`.
11508       switch (State) {
11509       case OMPDeclareSimdDeclAttr::BS_Undefined:
11510         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11511                                   OutputBecomesInput, Fn);
11512         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11513                                   OutputBecomesInput, Fn);
11514         break;
11515       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11516         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11517                                   OutputBecomesInput, Fn);
11518         break;
11519       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11520         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11521                                   OutputBecomesInput, Fn);
11522         break;
11523       }
11524     }
11525   }
11526 }
11527 
11528 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11529                                               llvm::Function *Fn) {
11530   ASTContext &C = CGM.getContext();
11531   FD = FD->getMostRecentDecl();
11532   // Map params to their positions in function decl.
11533   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11534   if (isa<CXXMethodDecl>(FD))
11535     ParamPositions.try_emplace(FD, 0);
11536   unsigned ParamPos = ParamPositions.size();
11537   for (const ParmVarDecl *P : FD->parameters()) {
11538     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11539     ++ParamPos;
11540   }
11541   while (FD) {
11542     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11543       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11544       // Mark uniform parameters.
11545       for (const Expr *E : Attr->uniforms()) {
11546         E = E->IgnoreParenImpCasts();
11547         unsigned Pos;
11548         if (isa<CXXThisExpr>(E)) {
11549           Pos = ParamPositions[FD];
11550         } else {
11551           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11552                                 ->getCanonicalDecl();
11553           Pos = ParamPositions[PVD];
11554         }
11555         ParamAttrs[Pos].Kind = Uniform;
11556       }
11557       // Get alignment info.
11558       auto NI = Attr->alignments_begin();
11559       for (const Expr *E : Attr->aligneds()) {
11560         E = E->IgnoreParenImpCasts();
11561         unsigned Pos;
11562         QualType ParmTy;
11563         if (isa<CXXThisExpr>(E)) {
11564           Pos = ParamPositions[FD];
11565           ParmTy = E->getType();
11566         } else {
11567           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11568                                 ->getCanonicalDecl();
11569           Pos = ParamPositions[PVD];
11570           ParmTy = PVD->getType();
11571         }
11572         ParamAttrs[Pos].Alignment =
11573             (*NI)
11574                 ? (*NI)->EvaluateKnownConstInt(C)
11575                 : llvm::APSInt::getUnsigned(
11576                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11577                           .getQuantity());
11578         ++NI;
11579       }
11580       // Mark linear parameters.
11581       auto SI = Attr->steps_begin();
11582       auto MI = Attr->modifiers_begin();
11583       for (const Expr *E : Attr->linears()) {
11584         E = E->IgnoreParenImpCasts();
11585         unsigned Pos;
11586         // Rescaling factor needed to compute the linear parameter
11587         // value in the mangled name.
11588         unsigned PtrRescalingFactor = 1;
11589         if (isa<CXXThisExpr>(E)) {
11590           Pos = ParamPositions[FD];
11591         } else {
11592           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11593                                 ->getCanonicalDecl();
11594           Pos = ParamPositions[PVD];
11595           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11596             PtrRescalingFactor = CGM.getContext()
11597                                      .getTypeSizeInChars(P->getPointeeType())
11598                                      .getQuantity();
11599         }
11600         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11601         ParamAttr.Kind = Linear;
11602         // Assuming a stride of 1, for `linear` without modifiers.
11603         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11604         if (*SI) {
11605           Expr::EvalResult Result;
11606           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11607             if (const auto *DRE =
11608                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11609               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11610                 ParamAttr.Kind = LinearWithVarStride;
11611                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11612                     ParamPositions[StridePVD->getCanonicalDecl()]);
11613               }
11614             }
11615           } else {
11616             ParamAttr.StrideOrArg = Result.Val.getInt();
11617           }
11618         }
11619         // If we are using a linear clause on a pointer, we need to
11620         // rescale the value of linear_step with the byte size of the
11621         // pointee type.
11622         if (Linear == ParamAttr.Kind)
11623           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11624         ++SI;
11625         ++MI;
11626       }
11627       llvm::APSInt VLENVal;
11628       SourceLocation ExprLoc;
11629       const Expr *VLENExpr = Attr->getSimdlen();
11630       if (VLENExpr) {
11631         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11632         ExprLoc = VLENExpr->getExprLoc();
11633       }
11634       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11635       if (CGM.getTriple().isX86()) {
11636         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11637       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11638         unsigned VLEN = VLENVal.getExtValue();
11639         StringRef MangledName = Fn->getName();
11640         if (CGM.getTarget().hasFeature("sve"))
11641           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11642                                          MangledName, 's', 128, Fn, ExprLoc);
11643         if (CGM.getTarget().hasFeature("neon"))
11644           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11645                                          MangledName, 'n', 128, Fn, ExprLoc);
11646       }
11647     }
11648     FD = FD->getPreviousDecl();
11649   }
11650 }
11651 
11652 namespace {
11653 /// Cleanup action for doacross support.
11654 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11655 public:
11656   static const int DoacrossFinArgs = 2;
11657 
11658 private:
11659   llvm::FunctionCallee RTLFn;
11660   llvm::Value *Args[DoacrossFinArgs];
11661 
11662 public:
11663   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11664                     ArrayRef<llvm::Value *> CallArgs)
11665       : RTLFn(RTLFn) {
11666     assert(CallArgs.size() == DoacrossFinArgs);
11667     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11668   }
11669   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11670     if (!CGF.HaveInsertPoint())
11671       return;
11672     CGF.EmitRuntimeCall(RTLFn, Args);
11673   }
11674 };
11675 } // namespace
11676 
11677 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11678                                        const OMPLoopDirective &D,
11679                                        ArrayRef<Expr *> NumIterations) {
11680   if (!CGF.HaveInsertPoint())
11681     return;
11682 
11683   ASTContext &C = CGM.getContext();
11684   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11685   RecordDecl *RD;
11686   if (KmpDimTy.isNull()) {
11687     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11688     //  kmp_int64 lo; // lower
11689     //  kmp_int64 up; // upper
11690     //  kmp_int64 st; // stride
11691     // };
11692     RD = C.buildImplicitRecord("kmp_dim");
11693     RD->startDefinition();
11694     addFieldToRecordDecl(C, RD, Int64Ty);
11695     addFieldToRecordDecl(C, RD, Int64Ty);
11696     addFieldToRecordDecl(C, RD, Int64Ty);
11697     RD->completeDefinition();
11698     KmpDimTy = C.getRecordType(RD);
11699   } else {
11700     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11701   }
11702   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11703   QualType ArrayTy =
11704       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11705 
11706   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11707   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11708   enum { LowerFD = 0, UpperFD, StrideFD };
11709   // Fill dims with data.
11710   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11711     LValue DimsLVal = CGF.MakeAddrLValue(
11712         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11713     // dims.upper = num_iterations;
11714     LValue UpperLVal = CGF.EmitLValueForField(
11715         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11716     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11717         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11718         Int64Ty, NumIterations[I]->getExprLoc());
11719     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11720     // dims.stride = 1;
11721     LValue StrideLVal = CGF.EmitLValueForField(
11722         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11723     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11724                           StrideLVal);
11725   }
11726 
11727   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11728   // kmp_int32 num_dims, struct kmp_dim * dims);
11729   llvm::Value *Args[] = {
11730       emitUpdateLocation(CGF, D.getBeginLoc()),
11731       getThreadID(CGF, D.getBeginLoc()),
11732       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11733       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11734           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11735           CGM.VoidPtrTy)};
11736 
11737   llvm::FunctionCallee RTLFn =
11738       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11739   CGF.EmitRuntimeCall(RTLFn, Args);
11740   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11741       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11742   llvm::FunctionCallee FiniRTLFn =
11743       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11744   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11745                                              llvm::makeArrayRef(FiniArgs));
11746 }
11747 
11748 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11749                                           const OMPDependClause *C) {
11750   QualType Int64Ty =
11751       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11752   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11753   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11754       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11755   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11756   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11757     const Expr *CounterVal = C->getLoopData(I);
11758     assert(CounterVal);
11759     llvm::Value *CntVal = CGF.EmitScalarConversion(
11760         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11761         CounterVal->getExprLoc());
11762     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11763                           /*Volatile=*/false, Int64Ty);
11764   }
11765   llvm::Value *Args[] = {
11766       emitUpdateLocation(CGF, C->getBeginLoc()),
11767       getThreadID(CGF, C->getBeginLoc()),
11768       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11769   llvm::FunctionCallee RTLFn;
11770   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11771     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11772   } else {
11773     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11774     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11775   }
11776   CGF.EmitRuntimeCall(RTLFn, Args);
11777 }
11778 
11779 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11780                                llvm::FunctionCallee Callee,
11781                                ArrayRef<llvm::Value *> Args) const {
11782   assert(Loc.isValid() && "Outlined function call location must be valid.");
11783   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11784 
11785   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11786     if (Fn->doesNotThrow()) {
11787       CGF.EmitNounwindRuntimeCall(Fn, Args);
11788       return;
11789     }
11790   }
11791   CGF.EmitRuntimeCall(Callee, Args);
11792 }
11793 
11794 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11795     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11796     ArrayRef<llvm::Value *> Args) const {
11797   emitCall(CGF, Loc, OutlinedFn, Args);
11798 }
11799 
11800 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11801   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11802     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11803       HasEmittedDeclareTargetRegion = true;
11804 }
11805 
11806 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11807                                              const VarDecl *NativeParam,
11808                                              const VarDecl *TargetParam) const {
11809   return CGF.GetAddrOfLocalVar(NativeParam);
11810 }
11811 
11812 namespace {
11813 /// Cleanup action for allocate support.
11814 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11815 public:
11816   static const int CleanupArgs = 3;
11817 
11818 private:
11819   llvm::FunctionCallee RTLFn;
11820   llvm::Value *Args[CleanupArgs];
11821 
11822 public:
11823   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11824                        ArrayRef<llvm::Value *> CallArgs)
11825       : RTLFn(RTLFn) {
11826     assert(CallArgs.size() == CleanupArgs &&
11827            "Size of arguments does not match.");
11828     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11829   }
11830   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11831     if (!CGF.HaveInsertPoint())
11832       return;
11833     CGF.EmitRuntimeCall(RTLFn, Args);
11834   }
11835 };
11836 } // namespace
11837 
11838 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11839                                                    const VarDecl *VD) {
11840   if (!VD)
11841     return Address::invalid();
11842   const VarDecl *CVD = VD->getCanonicalDecl();
11843   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11844     return Address::invalid();
11845   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11846   // Use the default allocation.
11847   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11848        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11849       !AA->getAllocator())
11850     return Address::invalid();
11851   llvm::Value *Size;
11852   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11853   if (CVD->getType()->isVariablyModifiedType()) {
11854     Size = CGF.getTypeSize(CVD->getType());
11855     // Align the size: ((size + align - 1) / align) * align
11856     Size = CGF.Builder.CreateNUWAdd(
11857         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11858     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11859     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11860   } else {
11861     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11862     Size = CGM.getSize(Sz.alignTo(Align));
11863   }
11864   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11865   assert(AA->getAllocator() &&
11866          "Expected allocator expression for non-default allocator.");
11867   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11868   // According to the standard, the original allocator type is a enum (integer).
11869   // Convert to pointer type, if required.
11870   if (Allocator->getType()->isIntegerTy())
11871     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11872   else if (Allocator->getType()->isPointerTy())
11873     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11874                                                                 CGM.VoidPtrTy);
11875   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11876 
11877   llvm::Value *Addr =
11878       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11879                           getName({CVD->getName(), ".void.addr"}));
11880   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11881                                                               Allocator};
11882   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11883 
11884   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11885                                                 llvm::makeArrayRef(FiniArgs));
11886   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11887       Addr,
11888       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11889       getName({CVD->getName(), ".addr"}));
11890   return Address(Addr, Align);
11891 }
11892 
11893 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11894     CodeGenModule &CGM, const OMPLoopDirective &S)
11895     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11896   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11897   if (!NeedToPush)
11898     return;
11899   NontemporalDeclsSet &DS =
11900       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11901   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11902     for (const Stmt *Ref : C->private_refs()) {
11903       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11904       const ValueDecl *VD;
11905       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11906         VD = DRE->getDecl();
11907       } else {
11908         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11909         assert((ME->isImplicitCXXThis() ||
11910                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11911                "Expected member of current class.");
11912         VD = ME->getMemberDecl();
11913       }
11914       DS.insert(VD);
11915     }
11916   }
11917 }
11918 
11919 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11920   if (!NeedToPush)
11921     return;
11922   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11923 }
11924 
11925 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11926   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11927 
11928   return llvm::any_of(
11929       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11930       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11931 }
11932 
11933 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11934     const OMPExecutableDirective &S,
11935     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11936     const {
11937   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11938   // Vars in target/task regions must be excluded completely.
11939   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11940       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11941     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11942     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11943     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11944     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11945       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11946         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11947     }
11948   }
11949   // Exclude vars in private clauses.
11950   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11951     for (const Expr *Ref : C->varlists()) {
11952       if (!Ref->getType()->isScalarType())
11953         continue;
11954       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11955       if (!DRE)
11956         continue;
11957       NeedToCheckForLPCs.insert(DRE->getDecl());
11958     }
11959   }
11960   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11961     for (const Expr *Ref : C->varlists()) {
11962       if (!Ref->getType()->isScalarType())
11963         continue;
11964       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11965       if (!DRE)
11966         continue;
11967       NeedToCheckForLPCs.insert(DRE->getDecl());
11968     }
11969   }
11970   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11971     for (const Expr *Ref : C->varlists()) {
11972       if (!Ref->getType()->isScalarType())
11973         continue;
11974       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11975       if (!DRE)
11976         continue;
11977       NeedToCheckForLPCs.insert(DRE->getDecl());
11978     }
11979   }
11980   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11981     for (const Expr *Ref : C->varlists()) {
11982       if (!Ref->getType()->isScalarType())
11983         continue;
11984       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11985       if (!DRE)
11986         continue;
11987       NeedToCheckForLPCs.insert(DRE->getDecl());
11988     }
11989   }
11990   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11991     for (const Expr *Ref : C->varlists()) {
11992       if (!Ref->getType()->isScalarType())
11993         continue;
11994       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11995       if (!DRE)
11996         continue;
11997       NeedToCheckForLPCs.insert(DRE->getDecl());
11998     }
11999   }
12000   for (const Decl *VD : NeedToCheckForLPCs) {
12001     for (const LastprivateConditionalData &Data :
12002          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12003       if (Data.DeclToUniqueName.count(VD) > 0) {
12004         if (!Data.Disabled)
12005           NeedToAddForLPCsAsDisabled.insert(VD);
12006         break;
12007       }
12008     }
12009   }
12010 }
12011 
12012 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12013     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12014     : CGM(CGF.CGM),
12015       Action((CGM.getLangOpts().OpenMP >= 50 &&
12016               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12017                            [](const OMPLastprivateClause *C) {
12018                              return C->getKind() ==
12019                                     OMPC_LASTPRIVATE_conditional;
12020                            }))
12021                  ? ActionToDo::PushAsLastprivateConditional
12022                  : ActionToDo::DoNotPush) {
12023   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12024   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12025     return;
12026   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12027          "Expected a push action.");
12028   LastprivateConditionalData &Data =
12029       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12030   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12031     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12032       continue;
12033 
12034     for (const Expr *Ref : C->varlists()) {
12035       Data.DeclToUniqueName.insert(std::make_pair(
12036           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12037           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12038     }
12039   }
12040   Data.IVLVal = IVLVal;
12041   Data.Fn = CGF.CurFn;
12042 }
12043 
12044 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12045     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12046     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12047   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12048   if (CGM.getLangOpts().OpenMP < 50)
12049     return;
12050   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12051   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12052   if (!NeedToAddForLPCsAsDisabled.empty()) {
12053     Action = ActionToDo::DisableLastprivateConditional;
12054     LastprivateConditionalData &Data =
12055         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12056     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12057       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12058     Data.Fn = CGF.CurFn;
12059     Data.Disabled = true;
12060   }
12061 }
12062 
12063 CGOpenMPRuntime::LastprivateConditionalRAII
12064 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12065     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12066   return LastprivateConditionalRAII(CGF, S);
12067 }
12068 
12069 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12070   if (CGM.getLangOpts().OpenMP < 50)
12071     return;
12072   if (Action == ActionToDo::DisableLastprivateConditional) {
12073     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12074            "Expected list of disabled private vars.");
12075     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12076   }
12077   if (Action == ActionToDo::PushAsLastprivateConditional) {
12078     assert(
12079         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12080         "Expected list of lastprivate conditional vars.");
12081     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12082   }
12083 }
12084 
12085 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12086                                                         const VarDecl *VD) {
12087   ASTContext &C = CGM.getContext();
12088   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12089   if (I == LastprivateConditionalToTypes.end())
12090     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12091   QualType NewType;
12092   const FieldDecl *VDField;
12093   const FieldDecl *FiredField;
12094   LValue BaseLVal;
12095   auto VI = I->getSecond().find(VD);
12096   if (VI == I->getSecond().end()) {
12097     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12098     RD->startDefinition();
12099     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12100     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12101     RD->completeDefinition();
12102     NewType = C.getRecordType(RD);
12103     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12104     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12105     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12106   } else {
12107     NewType = std::get<0>(VI->getSecond());
12108     VDField = std::get<1>(VI->getSecond());
12109     FiredField = std::get<2>(VI->getSecond());
12110     BaseLVal = std::get<3>(VI->getSecond());
12111   }
12112   LValue FiredLVal =
12113       CGF.EmitLValueForField(BaseLVal, FiredField);
12114   CGF.EmitStoreOfScalar(
12115       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12116       FiredLVal);
12117   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12118 }
12119 
12120 namespace {
12121 /// Checks if the lastprivate conditional variable is referenced in LHS.
12122 class LastprivateConditionalRefChecker final
12123     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12124   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12125   const Expr *FoundE = nullptr;
12126   const Decl *FoundD = nullptr;
12127   StringRef UniqueDeclName;
12128   LValue IVLVal;
12129   llvm::Function *FoundFn = nullptr;
12130   SourceLocation Loc;
12131 
12132 public:
12133   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12134     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12135          llvm::reverse(LPM)) {
12136       auto It = D.DeclToUniqueName.find(E->getDecl());
12137       if (It == D.DeclToUniqueName.end())
12138         continue;
12139       if (D.Disabled)
12140         return false;
12141       FoundE = E;
12142       FoundD = E->getDecl()->getCanonicalDecl();
12143       UniqueDeclName = It->second;
12144       IVLVal = D.IVLVal;
12145       FoundFn = D.Fn;
12146       break;
12147     }
12148     return FoundE == E;
12149   }
12150   bool VisitMemberExpr(const MemberExpr *E) {
12151     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12152       return false;
12153     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12154          llvm::reverse(LPM)) {
12155       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12156       if (It == D.DeclToUniqueName.end())
12157         continue;
12158       if (D.Disabled)
12159         return false;
12160       FoundE = E;
12161       FoundD = E->getMemberDecl()->getCanonicalDecl();
12162       UniqueDeclName = It->second;
12163       IVLVal = D.IVLVal;
12164       FoundFn = D.Fn;
12165       break;
12166     }
12167     return FoundE == E;
12168   }
12169   bool VisitStmt(const Stmt *S) {
12170     for (const Stmt *Child : S->children()) {
12171       if (!Child)
12172         continue;
12173       if (const auto *E = dyn_cast<Expr>(Child))
12174         if (!E->isGLValue())
12175           continue;
12176       if (Visit(Child))
12177         return true;
12178     }
12179     return false;
12180   }
12181   explicit LastprivateConditionalRefChecker(
12182       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12183       : LPM(LPM) {}
12184   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12185   getFoundData() const {
12186     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12187   }
12188 };
12189 } // namespace
12190 
12191 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12192                                                        LValue IVLVal,
12193                                                        StringRef UniqueDeclName,
12194                                                        LValue LVal,
12195                                                        SourceLocation Loc) {
12196   // Last updated loop counter for the lastprivate conditional var.
12197   // int<xx> last_iv = 0;
12198   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12199   llvm::Constant *LastIV =
12200       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12201   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12202       IVLVal.getAlignment().getAsAlign());
12203   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12204 
12205   // Last value of the lastprivate conditional.
12206   // decltype(priv_a) last_a;
12207   llvm::Constant *Last = getOrCreateInternalVariable(
12208       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12209   cast<llvm::GlobalVariable>(Last)->setAlignment(
12210       LVal.getAlignment().getAsAlign());
12211   LValue LastLVal =
12212       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12213 
12214   // Global loop counter. Required to handle inner parallel-for regions.
12215   // iv
12216   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12217 
12218   // #pragma omp critical(a)
12219   // if (last_iv <= iv) {
12220   //   last_iv = iv;
12221   //   last_a = priv_a;
12222   // }
12223   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12224                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12225     Action.Enter(CGF);
12226     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12227     // (last_iv <= iv) ? Check if the variable is updated and store new
12228     // value in global var.
12229     llvm::Value *CmpRes;
12230     if (IVLVal.getType()->isSignedIntegerType()) {
12231       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12232     } else {
12233       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12234              "Loop iteration variable must be integer.");
12235       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12236     }
12237     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12238     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12239     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12240     // {
12241     CGF.EmitBlock(ThenBB);
12242 
12243     //   last_iv = iv;
12244     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12245 
12246     //   last_a = priv_a;
12247     switch (CGF.getEvaluationKind(LVal.getType())) {
12248     case TEK_Scalar: {
12249       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12250       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12251       break;
12252     }
12253     case TEK_Complex: {
12254       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12255       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12256       break;
12257     }
12258     case TEK_Aggregate:
12259       llvm_unreachable(
12260           "Aggregates are not supported in lastprivate conditional.");
12261     }
12262     // }
12263     CGF.EmitBranch(ExitBB);
12264     // There is no need to emit line number for unconditional branch.
12265     (void)ApplyDebugLocation::CreateEmpty(CGF);
12266     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12267   };
12268 
12269   if (CGM.getLangOpts().OpenMPSimd) {
12270     // Do not emit as a critical region as no parallel region could be emitted.
12271     RegionCodeGenTy ThenRCG(CodeGen);
12272     ThenRCG(CGF);
12273   } else {
12274     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12275   }
12276 }
12277 
12278 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12279                                                          const Expr *LHS) {
12280   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12281     return;
12282   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12283   if (!Checker.Visit(LHS))
12284     return;
12285   const Expr *FoundE;
12286   const Decl *FoundD;
12287   StringRef UniqueDeclName;
12288   LValue IVLVal;
12289   llvm::Function *FoundFn;
12290   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12291       Checker.getFoundData();
12292   if (FoundFn != CGF.CurFn) {
12293     // Special codegen for inner parallel regions.
12294     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12295     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12296     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12297            "Lastprivate conditional is not found in outer region.");
12298     QualType StructTy = std::get<0>(It->getSecond());
12299     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12300     LValue PrivLVal = CGF.EmitLValue(FoundE);
12301     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12302         PrivLVal.getAddress(CGF),
12303         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12304     LValue BaseLVal =
12305         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12306     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12307     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12308                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12309                         FiredLVal, llvm::AtomicOrdering::Unordered,
12310                         /*IsVolatile=*/true, /*isInit=*/false);
12311     return;
12312   }
12313 
12314   // Private address of the lastprivate conditional in the current context.
12315   // priv_a
12316   LValue LVal = CGF.EmitLValue(FoundE);
12317   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12318                                    FoundE->getExprLoc());
12319 }
12320 
12321 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12322     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12323     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12324   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12325     return;
12326   auto Range = llvm::reverse(LastprivateConditionalStack);
12327   auto It = llvm::find_if(
12328       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12329   if (It == Range.end() || It->Fn != CGF.CurFn)
12330     return;
12331   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12332   assert(LPCI != LastprivateConditionalToTypes.end() &&
12333          "Lastprivates must be registered already.");
12334   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12335   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12336   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12337   for (const auto &Pair : It->DeclToUniqueName) {
12338     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12339     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12340       continue;
12341     auto I = LPCI->getSecond().find(Pair.first);
12342     assert(I != LPCI->getSecond().end() &&
12343            "Lastprivate must be rehistered already.");
12344     // bool Cmp = priv_a.Fired != 0;
12345     LValue BaseLVal = std::get<3>(I->getSecond());
12346     LValue FiredLVal =
12347         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12348     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12349     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12350     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12351     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12352     // if (Cmp) {
12353     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12354     CGF.EmitBlock(ThenBB);
12355     Address Addr = CGF.GetAddrOfLocalVar(VD);
12356     LValue LVal;
12357     if (VD->getType()->isReferenceType())
12358       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12359                                            AlignmentSource::Decl);
12360     else
12361       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12362                                 AlignmentSource::Decl);
12363     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12364                                      D.getBeginLoc());
12365     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12366     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12367     // }
12368   }
12369 }
12370 
12371 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12372     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12373     SourceLocation Loc) {
12374   if (CGF.getLangOpts().OpenMP < 50)
12375     return;
12376   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12377   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12378          "Unknown lastprivate conditional variable.");
12379   StringRef UniqueName = It->second;
12380   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12381   // The variable was not updated in the region - exit.
12382   if (!GV)
12383     return;
12384   LValue LPLVal = CGF.MakeAddrLValue(
12385       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12386   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12387   CGF.EmitStoreOfScalar(Res, PrivLVal);
12388 }
12389 
12390 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12391     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12392     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12393   llvm_unreachable("Not supported in SIMD-only mode");
12394 }
12395 
12396 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12397     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12398     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12399   llvm_unreachable("Not supported in SIMD-only mode");
12400 }
12401 
12402 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12403     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12404     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12405     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12406     bool Tied, unsigned &NumberOfParts) {
12407   llvm_unreachable("Not supported in SIMD-only mode");
12408 }
12409 
12410 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12411                                            SourceLocation Loc,
12412                                            llvm::Function *OutlinedFn,
12413                                            ArrayRef<llvm::Value *> CapturedVars,
12414                                            const Expr *IfCond) {
12415   llvm_unreachable("Not supported in SIMD-only mode");
12416 }
12417 
12418 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12419     CodeGenFunction &CGF, StringRef CriticalName,
12420     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12421     const Expr *Hint) {
12422   llvm_unreachable("Not supported in SIMD-only mode");
12423 }
12424 
12425 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12426                                            const RegionCodeGenTy &MasterOpGen,
12427                                            SourceLocation Loc) {
12428   llvm_unreachable("Not supported in SIMD-only mode");
12429 }
12430 
12431 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12432                                             SourceLocation Loc) {
12433   llvm_unreachable("Not supported in SIMD-only mode");
12434 }
12435 
12436 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12437     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12438     SourceLocation Loc) {
12439   llvm_unreachable("Not supported in SIMD-only mode");
12440 }
12441 
12442 void CGOpenMPSIMDRuntime::emitSingleRegion(
12443     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12444     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12445     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12446     ArrayRef<const Expr *> AssignmentOps) {
12447   llvm_unreachable("Not supported in SIMD-only mode");
12448 }
12449 
12450 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12451                                             const RegionCodeGenTy &OrderedOpGen,
12452                                             SourceLocation Loc,
12453                                             bool IsThreads) {
12454   llvm_unreachable("Not supported in SIMD-only mode");
12455 }
12456 
12457 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12458                                           SourceLocation Loc,
12459                                           OpenMPDirectiveKind Kind,
12460                                           bool EmitChecks,
12461                                           bool ForceSimpleCall) {
12462   llvm_unreachable("Not supported in SIMD-only mode");
12463 }
12464 
12465 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12466     CodeGenFunction &CGF, SourceLocation Loc,
12467     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12468     bool Ordered, const DispatchRTInput &DispatchValues) {
12469   llvm_unreachable("Not supported in SIMD-only mode");
12470 }
12471 
12472 void CGOpenMPSIMDRuntime::emitForStaticInit(
12473     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12474     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12475   llvm_unreachable("Not supported in SIMD-only mode");
12476 }
12477 
12478 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12479     CodeGenFunction &CGF, SourceLocation Loc,
12480     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12481   llvm_unreachable("Not supported in SIMD-only mode");
12482 }
12483 
12484 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12485                                                      SourceLocation Loc,
12486                                                      unsigned IVSize,
12487                                                      bool IVSigned) {
12488   llvm_unreachable("Not supported in SIMD-only mode");
12489 }
12490 
12491 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12492                                               SourceLocation Loc,
12493                                               OpenMPDirectiveKind DKind) {
12494   llvm_unreachable("Not supported in SIMD-only mode");
12495 }
12496 
12497 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12498                                               SourceLocation Loc,
12499                                               unsigned IVSize, bool IVSigned,
12500                                               Address IL, Address LB,
12501                                               Address UB, Address ST) {
12502   llvm_unreachable("Not supported in SIMD-only mode");
12503 }
12504 
12505 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12506                                                llvm::Value *NumThreads,
12507                                                SourceLocation Loc) {
12508   llvm_unreachable("Not supported in SIMD-only mode");
12509 }
12510 
12511 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12512                                              ProcBindKind ProcBind,
12513                                              SourceLocation Loc) {
12514   llvm_unreachable("Not supported in SIMD-only mode");
12515 }
12516 
12517 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12518                                                     const VarDecl *VD,
12519                                                     Address VDAddr,
12520                                                     SourceLocation Loc) {
12521   llvm_unreachable("Not supported in SIMD-only mode");
12522 }
12523 
12524 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12525     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12526     CodeGenFunction *CGF) {
12527   llvm_unreachable("Not supported in SIMD-only mode");
12528 }
12529 
12530 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12531     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12532   llvm_unreachable("Not supported in SIMD-only mode");
12533 }
12534 
12535 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12536                                     ArrayRef<const Expr *> Vars,
12537                                     SourceLocation Loc,
12538                                     llvm::AtomicOrdering AO) {
12539   llvm_unreachable("Not supported in SIMD-only mode");
12540 }
12541 
12542 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12543                                        const OMPExecutableDirective &D,
12544                                        llvm::Function *TaskFunction,
12545                                        QualType SharedsTy, Address Shareds,
12546                                        const Expr *IfCond,
12547                                        const OMPTaskDataTy &Data) {
12548   llvm_unreachable("Not supported in SIMD-only mode");
12549 }
12550 
12551 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12552     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12553     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12554     const Expr *IfCond, const OMPTaskDataTy &Data) {
12555   llvm_unreachable("Not supported in SIMD-only mode");
12556 }
12557 
12558 void CGOpenMPSIMDRuntime::emitReduction(
12559     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12560     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12561     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12562   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12563   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12564                                  ReductionOps, Options);
12565 }
12566 
12567 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12568     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12569     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12570   llvm_unreachable("Not supported in SIMD-only mode");
12571 }
12572 
12573 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12574                                                 SourceLocation Loc,
12575                                                 bool IsWorksharingReduction) {
12576   llvm_unreachable("Not supported in SIMD-only mode");
12577 }
12578 
12579 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12580                                                   SourceLocation Loc,
12581                                                   ReductionCodeGen &RCG,
12582                                                   unsigned N) {
12583   llvm_unreachable("Not supported in SIMD-only mode");
12584 }
12585 
12586 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12587                                                   SourceLocation Loc,
12588                                                   llvm::Value *ReductionsPtr,
12589                                                   LValue SharedLVal) {
12590   llvm_unreachable("Not supported in SIMD-only mode");
12591 }
12592 
12593 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12594                                            SourceLocation Loc) {
12595   llvm_unreachable("Not supported in SIMD-only mode");
12596 }
12597 
12598 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12599     CodeGenFunction &CGF, SourceLocation Loc,
12600     OpenMPDirectiveKind CancelRegion) {
12601   llvm_unreachable("Not supported in SIMD-only mode");
12602 }
12603 
12604 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12605                                          SourceLocation Loc, const Expr *IfCond,
12606                                          OpenMPDirectiveKind CancelRegion) {
12607   llvm_unreachable("Not supported in SIMD-only mode");
12608 }
12609 
12610 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12611     const OMPExecutableDirective &D, StringRef ParentName,
12612     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12613     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12614   llvm_unreachable("Not supported in SIMD-only mode");
12615 }
12616 
12617 void CGOpenMPSIMDRuntime::emitTargetCall(
12618     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12619     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12620     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12621     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12622                                      const OMPLoopDirective &D)>
12623         SizeEmitter) {
12624   llvm_unreachable("Not supported in SIMD-only mode");
12625 }
12626 
12627 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12628   llvm_unreachable("Not supported in SIMD-only mode");
12629 }
12630 
12631 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12632   llvm_unreachable("Not supported in SIMD-only mode");
12633 }
12634 
12635 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12636   return false;
12637 }
12638 
12639 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12640                                         const OMPExecutableDirective &D,
12641                                         SourceLocation Loc,
12642                                         llvm::Function *OutlinedFn,
12643                                         ArrayRef<llvm::Value *> CapturedVars) {
12644   llvm_unreachable("Not supported in SIMD-only mode");
12645 }
12646 
12647 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12648                                              const Expr *NumTeams,
12649                                              const Expr *ThreadLimit,
12650                                              SourceLocation Loc) {
12651   llvm_unreachable("Not supported in SIMD-only mode");
12652 }
12653 
12654 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12655     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12656     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12657   llvm_unreachable("Not supported in SIMD-only mode");
12658 }
12659 
12660 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12661     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12662     const Expr *Device) {
12663   llvm_unreachable("Not supported in SIMD-only mode");
12664 }
12665 
12666 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12667                                            const OMPLoopDirective &D,
12668                                            ArrayRef<Expr *> NumIterations) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12673                                               const OMPDependClause *C) {
12674   llvm_unreachable("Not supported in SIMD-only mode");
12675 }
12676 
12677 const VarDecl *
12678 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12679                                         const VarDecl *NativeParam) const {
12680   llvm_unreachable("Not supported in SIMD-only mode");
12681 }
12682 
12683 Address
12684 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12685                                          const VarDecl *NativeParam,
12686                                          const VarDecl *TargetParam) const {
12687   llvm_unreachable("Not supported in SIMD-only mode");
12688 }
12689