1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 enum OpenMPRTLFunction {
573   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
574   /// kmpc_micro microtask, ...);
575   OMPRTL__kmpc_fork_call,
576   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
577   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
578   OMPRTL__kmpc_threadprivate_cached,
579   /// Call to void __kmpc_threadprivate_register( ident_t *,
580   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
581   OMPRTL__kmpc_threadprivate_register,
582   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
583   OMPRTL__kmpc_global_thread_num,
584   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
585   // kmp_critical_name *crit);
586   OMPRTL__kmpc_critical,
587   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
588   // global_tid, kmp_critical_name *crit, uintptr_t hint);
589   OMPRTL__kmpc_critical_with_hint,
590   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
591   // kmp_critical_name *crit);
592   OMPRTL__kmpc_end_critical,
593   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_cancel_barrier,
596   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
597   OMPRTL__kmpc_barrier,
598   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
599   OMPRTL__kmpc_for_static_fini,
600   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
601   // global_tid);
602   OMPRTL__kmpc_serialized_parallel,
603   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
604   // global_tid);
605   OMPRTL__kmpc_end_serialized_parallel,
606   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
607   // kmp_int32 num_threads);
608   OMPRTL__kmpc_push_num_threads,
609   // Call to void __kmpc_flush(ident_t *loc);
610   OMPRTL__kmpc_flush,
611   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_master,
613   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_master,
615   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
616   // int end_part);
617   OMPRTL__kmpc_omp_taskyield,
618   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
619   OMPRTL__kmpc_single,
620   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
621   OMPRTL__kmpc_end_single,
622   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
623   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
624   // kmp_routine_entry_t *task_entry);
625   OMPRTL__kmpc_omp_task_alloc,
626   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
627   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
628   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
629   // kmp_int64 device_id);
630   OMPRTL__kmpc_omp_target_task_alloc,
631   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
632   // new_task);
633   OMPRTL__kmpc_omp_task,
634   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
635   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
636   // kmp_int32 didit);
637   OMPRTL__kmpc_copyprivate,
638   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
639   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
640   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
641   OMPRTL__kmpc_reduce,
642   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
643   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
644   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
645   // *lck);
646   OMPRTL__kmpc_reduce_nowait,
647   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
648   // kmp_critical_name *lck);
649   OMPRTL__kmpc_end_reduce,
650   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
651   // kmp_critical_name *lck);
652   OMPRTL__kmpc_end_reduce_nowait,
653   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
654   // kmp_task_t * new_task);
655   OMPRTL__kmpc_omp_task_begin_if0,
656   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
657   // kmp_task_t * new_task);
658   OMPRTL__kmpc_omp_task_complete_if0,
659   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_ordered,
661   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_ordered,
663   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
664   // global_tid);
665   OMPRTL__kmpc_omp_taskwait,
666   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
667   OMPRTL__kmpc_taskgroup,
668   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
669   OMPRTL__kmpc_end_taskgroup,
670   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
671   // int proc_bind);
672   OMPRTL__kmpc_push_proc_bind,
673   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
674   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
675   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
676   OMPRTL__kmpc_omp_task_with_deps,
677   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
678   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
679   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
680   OMPRTL__kmpc_omp_wait_deps,
681   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
682   // global_tid, kmp_int32 cncl_kind);
683   OMPRTL__kmpc_cancellationpoint,
684   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
685   // kmp_int32 cncl_kind);
686   OMPRTL__kmpc_cancel,
687   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
688   // kmp_int32 num_teams, kmp_int32 thread_limit);
689   OMPRTL__kmpc_push_num_teams,
690   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
691   // microtask, ...);
692   OMPRTL__kmpc_fork_teams,
693   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
694   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
695   // sched, kmp_uint64 grainsize, void *task_dup);
696   OMPRTL__kmpc_taskloop,
697   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
698   // num_dims, struct kmp_dim *dims);
699   OMPRTL__kmpc_doacross_init,
700   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
701   OMPRTL__kmpc_doacross_fini,
702   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
703   // *vec);
704   OMPRTL__kmpc_doacross_post,
705   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
706   // *vec);
707   OMPRTL__kmpc_doacross_wait,
708   // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data);
709   OMPRTL__kmpc_taskred_init,
710   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
711   // *d);
712   OMPRTL__kmpc_task_reduction_get_th_data,
713   // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
714   // is_ws, int num, void *data);
715   OMPRTL__kmpc_taskred_modifier_init,
716   // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
717   // int is_ws);
718   OMPRTL__kmpc_task_reduction_modifier_fini,
719   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
720   OMPRTL__kmpc_alloc,
721   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
722   OMPRTL__kmpc_free,
723   // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid,
724   // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
725   OMPRTL__kmpc_init_allocator,
726   // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
727   OMPRTL__kmpc_destroy_allocator,
728 
729   //
730   // Offloading related calls
731   //
732   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
733   // size);
734   OMPRTL__kmpc_push_target_tripcount,
735   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
736   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
737   // *arg_types);
738   OMPRTL__tgt_target,
739   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
740   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741   // *arg_types);
742   OMPRTL__tgt_target_nowait,
743   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
744   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
745   // *arg_types, int32_t num_teams, int32_t thread_limit);
746   OMPRTL__tgt_target_teams,
747   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
748   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
749   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
750   OMPRTL__tgt_target_teams_nowait,
751   // Call to void __tgt_register_requires(int64_t flags);
752   OMPRTL__tgt_register_requires,
753   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
754   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
755   OMPRTL__tgt_target_data_begin,
756   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
757   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
758   // *arg_types);
759   OMPRTL__tgt_target_data_begin_nowait,
760   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
761   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
762   OMPRTL__tgt_target_data_end,
763   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
764   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
765   // *arg_types);
766   OMPRTL__tgt_target_data_end_nowait,
767   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
768   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
769   OMPRTL__tgt_target_data_update,
770   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
771   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
772   // *arg_types);
773   OMPRTL__tgt_target_data_update_nowait,
774   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
775   OMPRTL__tgt_mapper_num_components,
776   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
777   // *base, void *begin, int64_t size, int64_t type);
778   OMPRTL__tgt_push_mapper_component,
779   // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
780   // int gtid, kmp_task_t *task);
781   OMPRTL__kmpc_task_allow_completion_event,
782 };
783 
784 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
785 /// region.
786 class CleanupTy final : public EHScopeStack::Cleanup {
787   PrePostActionTy *Action;
788 
789 public:
790   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
791   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
792     if (!CGF.HaveInsertPoint())
793       return;
794     Action->Exit(CGF);
795   }
796 };
797 
798 } // anonymous namespace
799 
800 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
801   CodeGenFunction::RunCleanupsScope Scope(CGF);
802   if (PrePostAction) {
803     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
804     Callback(CodeGen, CGF, *PrePostAction);
805   } else {
806     PrePostActionTy Action;
807     Callback(CodeGen, CGF, Action);
808   }
809 }
810 
811 /// Check if the combiner is a call to UDR combiner and if it is so return the
812 /// UDR decl used for reduction.
813 static const OMPDeclareReductionDecl *
814 getReductionInit(const Expr *ReductionOp) {
815   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
816     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
817       if (const auto *DRE =
818               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
819         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
820           return DRD;
821   return nullptr;
822 }
823 
824 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
825                                              const OMPDeclareReductionDecl *DRD,
826                                              const Expr *InitOp,
827                                              Address Private, Address Original,
828                                              QualType Ty) {
829   if (DRD->getInitializer()) {
830     std::pair<llvm::Function *, llvm::Function *> Reduction =
831         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
832     const auto *CE = cast<CallExpr>(InitOp);
833     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
834     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
835     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
836     const auto *LHSDRE =
837         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
838     const auto *RHSDRE =
839         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
840     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
841     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
842                             [=]() { return Private; });
843     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
844                             [=]() { return Original; });
845     (void)PrivateScope.Privatize();
846     RValue Func = RValue::get(Reduction.second);
847     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
848     CGF.EmitIgnoredExpr(InitOp);
849   } else {
850     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
851     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
852     auto *GV = new llvm::GlobalVariable(
853         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
854         llvm::GlobalValue::PrivateLinkage, Init, Name);
855     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
856     RValue InitRVal;
857     switch (CGF.getEvaluationKind(Ty)) {
858     case TEK_Scalar:
859       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
860       break;
861     case TEK_Complex:
862       InitRVal =
863           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
864       break;
865     case TEK_Aggregate:
866       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
867       break;
868     }
869     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
870     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
871     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
872                          /*IsInitializer=*/false);
873   }
874 }
875 
876 /// Emit initialization of arrays of complex types.
877 /// \param DestAddr Address of the array.
878 /// \param Type Type of array.
879 /// \param Init Initial expression of array.
880 /// \param SrcAddr Address of the original array.
881 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
882                                  QualType Type, bool EmitDeclareReductionInit,
883                                  const Expr *Init,
884                                  const OMPDeclareReductionDecl *DRD,
885                                  Address SrcAddr = Address::invalid()) {
886   // Perform element-by-element initialization.
887   QualType ElementTy;
888 
889   // Drill down to the base element type on both arrays.
890   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
891   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
892   DestAddr =
893       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
894   if (DRD)
895     SrcAddr =
896         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
897 
898   llvm::Value *SrcBegin = nullptr;
899   if (DRD)
900     SrcBegin = SrcAddr.getPointer();
901   llvm::Value *DestBegin = DestAddr.getPointer();
902   // Cast from pointer to array type to pointer to single element.
903   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
904   // The basic structure here is a while-do loop.
905   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
906   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
907   llvm::Value *IsEmpty =
908       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
909   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
910 
911   // Enter the loop body, making that address the current address.
912   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
913   CGF.EmitBlock(BodyBB);
914 
915   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
916 
917   llvm::PHINode *SrcElementPHI = nullptr;
918   Address SrcElementCurrent = Address::invalid();
919   if (DRD) {
920     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
921                                           "omp.arraycpy.srcElementPast");
922     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
923     SrcElementCurrent =
924         Address(SrcElementPHI,
925                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
926   }
927   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
928       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
929   DestElementPHI->addIncoming(DestBegin, EntryBB);
930   Address DestElementCurrent =
931       Address(DestElementPHI,
932               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
933 
934   // Emit copy.
935   {
936     CodeGenFunction::RunCleanupsScope InitScope(CGF);
937     if (EmitDeclareReductionInit) {
938       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
939                                        SrcElementCurrent, ElementTy);
940     } else
941       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
942                            /*IsInitializer=*/false);
943   }
944 
945   if (DRD) {
946     // Shift the address forward by one element.
947     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
948         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
949     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
950   }
951 
952   // Shift the address forward by one element.
953   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
954       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
955   // Check whether we've reached the end.
956   llvm::Value *Done =
957       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
958   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
959   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
960 
961   // Done.
962   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
963 }
964 
965 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
966   return CGF.EmitOMPSharedLValue(E);
967 }
968 
969 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
970                                             const Expr *E) {
971   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
972     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
973   return LValue();
974 }
975 
976 void ReductionCodeGen::emitAggregateInitialization(
977     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
978     const OMPDeclareReductionDecl *DRD) {
979   // Emit VarDecl with copy init for arrays.
980   // Get the address of the original variable captured in current
981   // captured region.
982   const auto *PrivateVD =
983       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
984   bool EmitDeclareReductionInit =
985       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
986   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
987                        EmitDeclareReductionInit,
988                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
989                                                 : PrivateVD->getInit(),
990                        DRD, SharedLVal.getAddress(CGF));
991 }
992 
993 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
994                                    ArrayRef<const Expr *> Origs,
995                                    ArrayRef<const Expr *> Privates,
996                                    ArrayRef<const Expr *> ReductionOps) {
997   ClausesData.reserve(Shareds.size());
998   SharedAddresses.reserve(Shareds.size());
999   Sizes.reserve(Shareds.size());
1000   BaseDecls.reserve(Shareds.size());
1001   const auto *IOrig = Origs.begin();
1002   const auto *IPriv = Privates.begin();
1003   const auto *IRed = ReductionOps.begin();
1004   for (const Expr *Ref : Shareds) {
1005     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
1006     std::advance(IOrig, 1);
1007     std::advance(IPriv, 1);
1008     std::advance(IRed, 1);
1009   }
1010 }
1011 
1012 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
1013   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
1014          "Number of generated lvalues must be exactly N.");
1015   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
1016   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
1017   SharedAddresses.emplace_back(First, Second);
1018   if (ClausesData[N].Shared == ClausesData[N].Ref) {
1019     OrigAddresses.emplace_back(First, Second);
1020   } else {
1021     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1022     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1023     OrigAddresses.emplace_back(First, Second);
1024   }
1025 }
1026 
1027 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1028   const auto *PrivateVD =
1029       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1030   QualType PrivateType = PrivateVD->getType();
1031   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1032   if (!PrivateType->isVariablyModifiedType()) {
1033     Sizes.emplace_back(
1034         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
1035         nullptr);
1036     return;
1037   }
1038   llvm::Value *Size;
1039   llvm::Value *SizeInChars;
1040   auto *ElemType =
1041       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
1042           ->getElementType();
1043   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1044   if (AsArraySection) {
1045     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
1046                                      OrigAddresses[N].first.getPointer(CGF));
1047     Size = CGF.Builder.CreateNUWAdd(
1048         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1049     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1050   } else {
1051     SizeInChars =
1052         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
1053     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1054   }
1055   Sizes.emplace_back(SizeInChars, Size);
1056   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1057       CGF,
1058       cast<OpaqueValueExpr>(
1059           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1060       RValue::get(Size));
1061   CGF.EmitVariablyModifiedType(PrivateType);
1062 }
1063 
1064 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1065                                          llvm::Value *Size) {
1066   const auto *PrivateVD =
1067       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1068   QualType PrivateType = PrivateVD->getType();
1069   if (!PrivateType->isVariablyModifiedType()) {
1070     assert(!Size && !Sizes[N].second &&
1071            "Size should be nullptr for non-variably modified reduction "
1072            "items.");
1073     return;
1074   }
1075   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1076       CGF,
1077       cast<OpaqueValueExpr>(
1078           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1079       RValue::get(Size));
1080   CGF.EmitVariablyModifiedType(PrivateType);
1081 }
1082 
1083 void ReductionCodeGen::emitInitialization(
1084     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1085     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1086   assert(SharedAddresses.size() > N && "No variable was generated");
1087   const auto *PrivateVD =
1088       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1089   const OMPDeclareReductionDecl *DRD =
1090       getReductionInit(ClausesData[N].ReductionOp);
1091   QualType PrivateType = PrivateVD->getType();
1092   PrivateAddr = CGF.Builder.CreateElementBitCast(
1093       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1094   QualType SharedType = SharedAddresses[N].first.getType();
1095   SharedLVal = CGF.MakeAddrLValue(
1096       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1097                                        CGF.ConvertTypeForMem(SharedType)),
1098       SharedType, SharedAddresses[N].first.getBaseInfo(),
1099       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1100   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1101     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1102   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1103     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1104                                      PrivateAddr, SharedLVal.getAddress(CGF),
1105                                      SharedLVal.getType());
1106   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1107              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1108     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1109                          PrivateVD->getType().getQualifiers(),
1110                          /*IsInitializer=*/false);
1111   }
1112 }
1113 
1114 bool ReductionCodeGen::needCleanups(unsigned N) {
1115   const auto *PrivateVD =
1116       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1117   QualType PrivateType = PrivateVD->getType();
1118   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1119   return DTorKind != QualType::DK_none;
1120 }
1121 
1122 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1123                                     Address PrivateAddr) {
1124   const auto *PrivateVD =
1125       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1126   QualType PrivateType = PrivateVD->getType();
1127   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1128   if (needCleanups(N)) {
1129     PrivateAddr = CGF.Builder.CreateElementBitCast(
1130         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1131     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1132   }
1133 }
1134 
1135 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1136                           LValue BaseLV) {
1137   BaseTy = BaseTy.getNonReferenceType();
1138   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1139          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1140     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1141       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1142     } else {
1143       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1144       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1145     }
1146     BaseTy = BaseTy->getPointeeType();
1147   }
1148   return CGF.MakeAddrLValue(
1149       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1150                                        CGF.ConvertTypeForMem(ElTy)),
1151       BaseLV.getType(), BaseLV.getBaseInfo(),
1152       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1153 }
1154 
1155 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1156                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1157                           llvm::Value *Addr) {
1158   Address Tmp = Address::invalid();
1159   Address TopTmp = Address::invalid();
1160   Address MostTopTmp = Address::invalid();
1161   BaseTy = BaseTy.getNonReferenceType();
1162   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1163          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1164     Tmp = CGF.CreateMemTemp(BaseTy);
1165     if (TopTmp.isValid())
1166       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1167     else
1168       MostTopTmp = Tmp;
1169     TopTmp = Tmp;
1170     BaseTy = BaseTy->getPointeeType();
1171   }
1172   llvm::Type *Ty = BaseLVType;
1173   if (Tmp.isValid())
1174     Ty = Tmp.getElementType();
1175   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1176   if (Tmp.isValid()) {
1177     CGF.Builder.CreateStore(Addr, Tmp);
1178     return MostTopTmp;
1179   }
1180   return Address(Addr, BaseLVAlignment);
1181 }
1182 
1183 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1184   const VarDecl *OrigVD = nullptr;
1185   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1186     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1187     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1188       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1189     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1190       Base = TempASE->getBase()->IgnoreParenImpCasts();
1191     DE = cast<DeclRefExpr>(Base);
1192     OrigVD = cast<VarDecl>(DE->getDecl());
1193   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1194     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1195     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1196       Base = TempASE->getBase()->IgnoreParenImpCasts();
1197     DE = cast<DeclRefExpr>(Base);
1198     OrigVD = cast<VarDecl>(DE->getDecl());
1199   }
1200   return OrigVD;
1201 }
1202 
1203 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1204                                                Address PrivateAddr) {
1205   const DeclRefExpr *DE;
1206   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1207     BaseDecls.emplace_back(OrigVD);
1208     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1209     LValue BaseLValue =
1210         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1211                     OriginalBaseLValue);
1212     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1213         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1214     llvm::Value *PrivatePointer =
1215         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1216             PrivateAddr.getPointer(),
1217             SharedAddresses[N].first.getAddress(CGF).getType());
1218     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1219     return castToBase(CGF, OrigVD->getType(),
1220                       SharedAddresses[N].first.getType(),
1221                       OriginalBaseLValue.getAddress(CGF).getType(),
1222                       OriginalBaseLValue.getAlignment(), Ptr);
1223   }
1224   BaseDecls.emplace_back(
1225       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1226   return PrivateAddr;
1227 }
1228 
1229 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1230   const OMPDeclareReductionDecl *DRD =
1231       getReductionInit(ClausesData[N].ReductionOp);
1232   return DRD && DRD->getInitializer();
1233 }
1234 
1235 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1236   return CGF.EmitLoadOfPointerLValue(
1237       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1238       getThreadIDVariable()->getType()->castAs<PointerType>());
1239 }
1240 
1241 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1242   if (!CGF.HaveInsertPoint())
1243     return;
1244   // 1.2.2 OpenMP Language Terminology
1245   // Structured block - An executable statement with a single entry at the
1246   // top and a single exit at the bottom.
1247   // The point of exit cannot be a branch out of the structured block.
1248   // longjmp() and throw() must not violate the entry/exit criteria.
1249   CGF.EHStack.pushTerminate();
1250   CodeGen(CGF);
1251   CGF.EHStack.popTerminate();
1252 }
1253 
1254 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1255     CodeGenFunction &CGF) {
1256   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1257                             getThreadIDVariable()->getType(),
1258                             AlignmentSource::Decl);
1259 }
1260 
1261 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1262                                        QualType FieldTy) {
1263   auto *Field = FieldDecl::Create(
1264       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1265       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1266       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1267   Field->setAccess(AS_public);
1268   DC->addDecl(Field);
1269   return Field;
1270 }
1271 
1272 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1273                                  StringRef Separator)
1274     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1275       OffloadEntriesInfoManager(CGM) {
1276   ASTContext &C = CGM.getContext();
1277   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1278   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1279   RD->startDefinition();
1280   // reserved_1
1281   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1282   // flags
1283   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1284   // reserved_2
1285   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1286   // reserved_3
1287   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1288   // psource
1289   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1290   RD->completeDefinition();
1291   IdentQTy = C.getRecordType(RD);
1292   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1293   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1294 
1295   loadOffloadInfoMetadata();
1296 }
1297 
1298 void CGOpenMPRuntime::clear() {
1299   InternalVars.clear();
1300   // Clean non-target variable declarations possibly used only in debug info.
1301   for (const auto &Data : EmittedNonTargetVariables) {
1302     if (!Data.getValue().pointsToAliveValue())
1303       continue;
1304     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1305     if (!GV)
1306       continue;
1307     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1308       continue;
1309     GV->eraseFromParent();
1310   }
1311 }
1312 
1313 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1314   SmallString<128> Buffer;
1315   llvm::raw_svector_ostream OS(Buffer);
1316   StringRef Sep = FirstSeparator;
1317   for (StringRef Part : Parts) {
1318     OS << Sep << Part;
1319     Sep = Separator;
1320   }
1321   return std::string(OS.str());
1322 }
1323 
1324 static llvm::Function *
1325 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1326                           const Expr *CombinerInitializer, const VarDecl *In,
1327                           const VarDecl *Out, bool IsCombiner) {
1328   // void .omp_combiner.(Ty *in, Ty *out);
1329   ASTContext &C = CGM.getContext();
1330   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1331   FunctionArgList Args;
1332   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1333                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1334   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1335                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1336   Args.push_back(&OmpOutParm);
1337   Args.push_back(&OmpInParm);
1338   const CGFunctionInfo &FnInfo =
1339       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1340   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1341   std::string Name = CGM.getOpenMPRuntime().getName(
1342       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1343   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1344                                     Name, &CGM.getModule());
1345   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1346   if (CGM.getLangOpts().Optimize) {
1347     Fn->removeFnAttr(llvm::Attribute::NoInline);
1348     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1349     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1350   }
1351   CodeGenFunction CGF(CGM);
1352   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1353   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1354   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1355                     Out->getLocation());
1356   CodeGenFunction::OMPPrivateScope Scope(CGF);
1357   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1358   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1359     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1360         .getAddress(CGF);
1361   });
1362   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1363   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1364     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1365         .getAddress(CGF);
1366   });
1367   (void)Scope.Privatize();
1368   if (!IsCombiner && Out->hasInit() &&
1369       !CGF.isTrivialInitializer(Out->getInit())) {
1370     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1371                          Out->getType().getQualifiers(),
1372                          /*IsInitializer=*/true);
1373   }
1374   if (CombinerInitializer)
1375     CGF.EmitIgnoredExpr(CombinerInitializer);
1376   Scope.ForceCleanup();
1377   CGF.FinishFunction();
1378   return Fn;
1379 }
1380 
1381 void CGOpenMPRuntime::emitUserDefinedReduction(
1382     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1383   if (UDRMap.count(D) > 0)
1384     return;
1385   llvm::Function *Combiner = emitCombinerOrInitializer(
1386       CGM, D->getType(), D->getCombiner(),
1387       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1388       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1389       /*IsCombiner=*/true);
1390   llvm::Function *Initializer = nullptr;
1391   if (const Expr *Init = D->getInitializer()) {
1392     Initializer = emitCombinerOrInitializer(
1393         CGM, D->getType(),
1394         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1395                                                                      : nullptr,
1396         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1397         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1398         /*IsCombiner=*/false);
1399   }
1400   UDRMap.try_emplace(D, Combiner, Initializer);
1401   if (CGF) {
1402     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1403     Decls.second.push_back(D);
1404   }
1405 }
1406 
1407 std::pair<llvm::Function *, llvm::Function *>
1408 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1409   auto I = UDRMap.find(D);
1410   if (I != UDRMap.end())
1411     return I->second;
1412   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1413   return UDRMap.lookup(D);
1414 }
1415 
1416 namespace {
1417 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1418 // Builder if one is present.
1419 struct PushAndPopStackRAII {
1420   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1421                       bool HasCancel)
1422       : OMPBuilder(OMPBuilder) {
1423     if (!OMPBuilder)
1424       return;
1425 
1426     // The following callback is the crucial part of clangs cleanup process.
1427     //
1428     // NOTE:
1429     // Once the OpenMPIRBuilder is used to create parallel regions (and
1430     // similar), the cancellation destination (Dest below) is determined via
1431     // IP. That means if we have variables to finalize we split the block at IP,
1432     // use the new block (=BB) as destination to build a JumpDest (via
1433     // getJumpDestInCurrentScope(BB)) which then is fed to
1434     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1435     // to push & pop an FinalizationInfo object.
1436     // The FiniCB will still be needed but at the point where the
1437     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1438     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1439       assert(IP.getBlock()->end() == IP.getPoint() &&
1440              "Clang CG should cause non-terminated block!");
1441       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1442       CGF.Builder.restoreIP(IP);
1443       CodeGenFunction::JumpDest Dest =
1444           CGF.getOMPCancelDestination(OMPD_parallel);
1445       CGF.EmitBranchThroughCleanup(Dest);
1446     };
1447 
1448     // TODO: Remove this once we emit parallel regions through the
1449     //       OpenMPIRBuilder as it can do this setup internally.
1450     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1451         {FiniCB, OMPD_parallel, HasCancel});
1452     OMPBuilder->pushFinalizationCB(std::move(FI));
1453   }
1454   ~PushAndPopStackRAII() {
1455     if (OMPBuilder)
1456       OMPBuilder->popFinalizationCB();
1457   }
1458   llvm::OpenMPIRBuilder *OMPBuilder;
1459 };
1460 } // namespace
1461 
1462 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1463     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1464     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1465     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1466   assert(ThreadIDVar->getType()->isPointerType() &&
1467          "thread id variable must be of type kmp_int32 *");
1468   CodeGenFunction CGF(CGM, true);
1469   bool HasCancel = false;
1470   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1471     HasCancel = OPD->hasCancel();
1472   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1473     HasCancel = OPD->hasCancel();
1474   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1475     HasCancel = OPSD->hasCancel();
1476   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1477     HasCancel = OPFD->hasCancel();
1478   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1479     HasCancel = OPFD->hasCancel();
1480   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1481     HasCancel = OPFD->hasCancel();
1482   else if (const auto *OPFD =
1483                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1484     HasCancel = OPFD->hasCancel();
1485   else if (const auto *OPFD =
1486                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1487     HasCancel = OPFD->hasCancel();
1488 
1489   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1490   //       parallel region to make cancellation barriers work properly.
1491   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1492   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1493   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1494                                     HasCancel, OutlinedHelperName);
1495   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1496   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1497 }
1498 
1499 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1500     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1501     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1502   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1503   return emitParallelOrTeamsOutlinedFunction(
1504       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1505 }
1506 
1507 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1508     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1509     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1510   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1511   return emitParallelOrTeamsOutlinedFunction(
1512       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1513 }
1514 
1515 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1516     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1517     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1518     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1519     bool Tied, unsigned &NumberOfParts) {
1520   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1521                                               PrePostActionTy &) {
1522     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1523     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1524     llvm::Value *TaskArgs[] = {
1525         UpLoc, ThreadID,
1526         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1527                                     TaskTVar->getType()->castAs<PointerType>())
1528             .getPointer(CGF)};
1529     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1530   };
1531   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1532                                                             UntiedCodeGen);
1533   CodeGen.setAction(Action);
1534   assert(!ThreadIDVar->getType()->isPointerType() &&
1535          "thread id variable must be of type kmp_int32 for tasks");
1536   const OpenMPDirectiveKind Region =
1537       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1538                                                       : OMPD_task;
1539   const CapturedStmt *CS = D.getCapturedStmt(Region);
1540   bool HasCancel = false;
1541   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1542     HasCancel = TD->hasCancel();
1543   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1544     HasCancel = TD->hasCancel();
1545   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1546     HasCancel = TD->hasCancel();
1547   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1548     HasCancel = TD->hasCancel();
1549 
1550   CodeGenFunction CGF(CGM, true);
1551   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1552                                         InnermostKind, HasCancel, Action);
1553   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1554   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1555   if (!Tied)
1556     NumberOfParts = Action.getNumberOfParts();
1557   return Res;
1558 }
1559 
1560 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1561                              const RecordDecl *RD, const CGRecordLayout &RL,
1562                              ArrayRef<llvm::Constant *> Data) {
1563   llvm::StructType *StructTy = RL.getLLVMType();
1564   unsigned PrevIdx = 0;
1565   ConstantInitBuilder CIBuilder(CGM);
1566   auto DI = Data.begin();
1567   for (const FieldDecl *FD : RD->fields()) {
1568     unsigned Idx = RL.getLLVMFieldNo(FD);
1569     // Fill the alignment.
1570     for (unsigned I = PrevIdx; I < Idx; ++I)
1571       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1572     PrevIdx = Idx + 1;
1573     Fields.add(*DI);
1574     ++DI;
1575   }
1576 }
1577 
1578 template <class... As>
1579 static llvm::GlobalVariable *
1580 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1581                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1582                    As &&... Args) {
1583   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1584   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1585   ConstantInitBuilder CIBuilder(CGM);
1586   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1587   buildStructValue(Fields, CGM, RD, RL, Data);
1588   return Fields.finishAndCreateGlobal(
1589       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1590       std::forward<As>(Args)...);
1591 }
1592 
1593 template <typename T>
1594 static void
1595 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1596                                          ArrayRef<llvm::Constant *> Data,
1597                                          T &Parent) {
1598   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1601   buildStructValue(Fields, CGM, RD, RL, Data);
1602   Fields.finishAndAddTo(Parent);
1603 }
1604 
1605 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1606   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1607   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1608   FlagsTy FlagsKey(Flags, Reserved2Flags);
1609   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1610   if (!Entry) {
1611     if (!DefaultOpenMPPSource) {
1612       // Initialize default location for psource field of ident_t structure of
1613       // all ident_t objects. Format is ";file;function;line;column;;".
1614       // Taken from
1615       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1616       DefaultOpenMPPSource =
1617           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1618       DefaultOpenMPPSource =
1619           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1620     }
1621 
1622     llvm::Constant *Data[] = {
1623         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1624         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1625         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1626         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1627     llvm::GlobalValue *DefaultOpenMPLocation =
1628         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1629                            llvm::GlobalValue::PrivateLinkage);
1630     DefaultOpenMPLocation->setUnnamedAddr(
1631         llvm::GlobalValue::UnnamedAddr::Global);
1632 
1633     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1634   }
1635   return Address(Entry, Align);
1636 }
1637 
1638 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1639                                              bool AtCurrentPoint) {
1640   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1641   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1642 
1643   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1644   if (AtCurrentPoint) {
1645     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1646         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1647   } else {
1648     Elem.second.ServiceInsertPt =
1649         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1650     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1651   }
1652 }
1653 
1654 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1655   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656   if (Elem.second.ServiceInsertPt) {
1657     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1658     Elem.second.ServiceInsertPt = nullptr;
1659     Ptr->eraseFromParent();
1660   }
1661 }
1662 
1663 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1664                                                  SourceLocation Loc,
1665                                                  unsigned Flags) {
1666   Flags |= OMP_IDENT_KMPC;
1667   // If no debug info is generated - return global default location.
1668   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1669       Loc.isInvalid())
1670     return getOrCreateDefaultLocation(Flags).getPointer();
1671 
1672   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1673 
1674   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1675   Address LocValue = Address::invalid();
1676   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1677   if (I != OpenMPLocThreadIDMap.end())
1678     LocValue = Address(I->second.DebugLoc, Align);
1679 
1680   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1681   // GetOpenMPThreadID was called before this routine.
1682   if (!LocValue.isValid()) {
1683     // Generate "ident_t .kmpc_loc.addr;"
1684     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1685     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1686     Elem.second.DebugLoc = AI.getPointer();
1687     LocValue = AI;
1688 
1689     if (!Elem.second.ServiceInsertPt)
1690       setLocThreadIdInsertPt(CGF);
1691     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1692     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1693     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1694                              CGF.getTypeSize(IdentQTy));
1695   }
1696 
1697   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1698   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1699   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1700   LValue PSource =
1701       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1702 
1703   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1704   if (OMPDebugLoc == nullptr) {
1705     SmallString<128> Buffer2;
1706     llvm::raw_svector_ostream OS2(Buffer2);
1707     // Build debug location
1708     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1709     OS2 << ";" << PLoc.getFilename() << ";";
1710     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1711       OS2 << FD->getQualifiedNameAsString();
1712     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1713     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1714     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1715   }
1716   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1717   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1718 
1719   // Our callers always pass this to a runtime function, so for
1720   // convenience, go ahead and return a naked pointer.
1721   return LocValue.getPointer();
1722 }
1723 
1724 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1725                                           SourceLocation Loc) {
1726   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1727 
1728   llvm::Value *ThreadID = nullptr;
1729   // Check whether we've already cached a load of the thread id in this
1730   // function.
1731   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1732   if (I != OpenMPLocThreadIDMap.end()) {
1733     ThreadID = I->second.ThreadID;
1734     if (ThreadID != nullptr)
1735       return ThreadID;
1736   }
1737   // If exceptions are enabled, do not use parameter to avoid possible crash.
1738   if (auto *OMPRegionInfo =
1739           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1740     if (OMPRegionInfo->getThreadIDVariable()) {
1741       // Check if this an outlined function with thread id passed as argument.
1742       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1743       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1744       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1745           !CGF.getLangOpts().CXXExceptions ||
1746           CGF.Builder.GetInsertBlock() == TopBlock ||
1747           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1748           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1749               TopBlock ||
1750           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1751               CGF.Builder.GetInsertBlock()) {
1752         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1753         // If value loaded in entry block, cache it and use it everywhere in
1754         // function.
1755         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1756           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1757           Elem.second.ThreadID = ThreadID;
1758         }
1759         return ThreadID;
1760       }
1761     }
1762   }
1763 
1764   // This is not an outlined function region - need to call __kmpc_int32
1765   // kmpc_global_thread_num(ident_t *loc).
1766   // Generate thread id value and cache this value for use across the
1767   // function.
1768   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1769   if (!Elem.second.ServiceInsertPt)
1770     setLocThreadIdInsertPt(CGF);
1771   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1772   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1773   llvm::CallInst *Call = CGF.Builder.CreateCall(
1774       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1775       emitUpdateLocation(CGF, Loc));
1776   Call->setCallingConv(CGF.getRuntimeCC());
1777   Elem.second.ThreadID = Call;
1778   return Call;
1779 }
1780 
1781 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1782   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1783   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1784     clearLocThreadIdInsertPt(CGF);
1785     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1786   }
1787   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1788     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1789       UDRMap.erase(D);
1790     FunctionUDRMap.erase(CGF.CurFn);
1791   }
1792   auto I = FunctionUDMMap.find(CGF.CurFn);
1793   if (I != FunctionUDMMap.end()) {
1794     for(const auto *D : I->second)
1795       UDMMap.erase(D);
1796     FunctionUDMMap.erase(I);
1797   }
1798   LastprivateConditionalToTypes.erase(CGF.CurFn);
1799 }
1800 
1801 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1802   return IdentTy->getPointerTo();
1803 }
1804 
1805 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1806   if (!Kmpc_MicroTy) {
1807     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1808     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1809                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1810     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1811   }
1812   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1813 }
1814 
1815 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1816   llvm::FunctionCallee RTLFn = nullptr;
1817   switch (static_cast<OpenMPRTLFunction>(Function)) {
1818   case OMPRTL__kmpc_fork_call: {
1819     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1820     // microtask, ...);
1821     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1822                                 getKmpc_MicroPointerTy()};
1823     auto *FnTy =
1824         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1825     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1826     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1827       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1828         llvm::LLVMContext &Ctx = F->getContext();
1829         llvm::MDBuilder MDB(Ctx);
1830         // Annotate the callback behavior of the __kmpc_fork_call:
1831         //  - The callback callee is argument number 2 (microtask).
1832         //  - The first two arguments of the callback callee are unknown (-1).
1833         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1834         //    callback callee.
1835         F->addMetadata(
1836             llvm::LLVMContext::MD_callback,
1837             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1838                                         2, {-1, -1},
1839                                         /* VarArgsArePassed */ true)}));
1840       }
1841     }
1842     break;
1843   }
1844   case OMPRTL__kmpc_global_thread_num: {
1845     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1846     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1850     break;
1851   }
1852   case OMPRTL__kmpc_threadprivate_cached: {
1853     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1854     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1855     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1856                                 CGM.VoidPtrTy, CGM.SizeTy,
1857                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1861     break;
1862   }
1863   case OMPRTL__kmpc_critical: {
1864     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1865     // kmp_critical_name *crit);
1866     llvm::Type *TypeParams[] = {
1867         getIdentTyPointerTy(), CGM.Int32Ty,
1868         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1872     break;
1873   }
1874   case OMPRTL__kmpc_critical_with_hint: {
1875     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1876     // kmp_critical_name *crit, uintptr_t hint);
1877     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1878                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1879                                 CGM.IntPtrTy};
1880     auto *FnTy =
1881         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1882     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1883     break;
1884   }
1885   case OMPRTL__kmpc_threadprivate_register: {
1886     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1887     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1888     // typedef void *(*kmpc_ctor)(void *);
1889     auto *KmpcCtorTy =
1890         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1891                                 /*isVarArg*/ false)->getPointerTo();
1892     // typedef void *(*kmpc_cctor)(void *, void *);
1893     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1894     auto *KmpcCopyCtorTy =
1895         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1896                                 /*isVarArg*/ false)
1897             ->getPointerTo();
1898     // typedef void (*kmpc_dtor)(void *);
1899     auto *KmpcDtorTy =
1900         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1901             ->getPointerTo();
1902     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1903                               KmpcCopyCtorTy, KmpcDtorTy};
1904     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1905                                         /*isVarArg*/ false);
1906     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1907     break;
1908   }
1909   case OMPRTL__kmpc_end_critical: {
1910     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1911     // kmp_critical_name *crit);
1912     llvm::Type *TypeParams[] = {
1913         getIdentTyPointerTy(), CGM.Int32Ty,
1914         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1915     auto *FnTy =
1916         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1917     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1918     break;
1919   }
1920   case OMPRTL__kmpc_cancel_barrier: {
1921     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1922     // global_tid);
1923     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1924     auto *FnTy =
1925         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1926     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1927     break;
1928   }
1929   case OMPRTL__kmpc_barrier: {
1930     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1931     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1935     break;
1936   }
1937   case OMPRTL__kmpc_for_static_fini: {
1938     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1939     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1940     auto *FnTy =
1941         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1942     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1943     break;
1944   }
1945   case OMPRTL__kmpc_push_num_threads: {
1946     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1947     // kmp_int32 num_threads)
1948     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1949                                 CGM.Int32Ty};
1950     auto *FnTy =
1951         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1952     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1953     break;
1954   }
1955   case OMPRTL__kmpc_serialized_parallel: {
1956     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1957     // global_tid);
1958     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1959     auto *FnTy =
1960         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1961     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1962     break;
1963   }
1964   case OMPRTL__kmpc_end_serialized_parallel: {
1965     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1966     // global_tid);
1967     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1968     auto *FnTy =
1969         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1970     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1971     break;
1972   }
1973   case OMPRTL__kmpc_flush: {
1974     // Build void __kmpc_flush(ident_t *loc);
1975     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1976     auto *FnTy =
1977         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1978     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1979     break;
1980   }
1981   case OMPRTL__kmpc_master: {
1982     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1983     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1984     auto *FnTy =
1985         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1986     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1987     break;
1988   }
1989   case OMPRTL__kmpc_end_master: {
1990     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1991     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1992     auto *FnTy =
1993         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1994     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1995     break;
1996   }
1997   case OMPRTL__kmpc_omp_taskyield: {
1998     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1999     // int end_part);
2000     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2001     auto *FnTy =
2002         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2003     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2004     break;
2005   }
2006   case OMPRTL__kmpc_single: {
2007     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2008     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2009     auto *FnTy =
2010         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2011     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2012     break;
2013   }
2014   case OMPRTL__kmpc_end_single: {
2015     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2016     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2017     auto *FnTy =
2018         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2019     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2020     break;
2021   }
2022   case OMPRTL__kmpc_omp_task_alloc: {
2023     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2024     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2025     // kmp_routine_entry_t *task_entry);
2026     assert(KmpRoutineEntryPtrTy != nullptr &&
2027            "Type kmp_routine_entry_t must be created.");
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2029                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2030     // Return void * and then cast to particular kmp_task_t type.
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2034     break;
2035   }
2036   case OMPRTL__kmpc_omp_target_task_alloc: {
2037     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2038     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2040     assert(KmpRoutineEntryPtrTy != nullptr &&
2041            "Type kmp_routine_entry_t must be created.");
2042     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2044                                 CGM.Int64Ty};
2045     // Return void * and then cast to particular kmp_task_t type.
2046     auto *FnTy =
2047         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2048     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2049     break;
2050   }
2051   case OMPRTL__kmpc_omp_task: {
2052     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2053     // *new_task);
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2055                                 CGM.VoidPtrTy};
2056     auto *FnTy =
2057         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2058     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2059     break;
2060   }
2061   case OMPRTL__kmpc_copyprivate: {
2062     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2063     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2064     // kmp_int32 didit);
2065     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2066     auto *CpyFnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2068     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2069                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2070                                 CGM.Int32Ty};
2071     auto *FnTy =
2072         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2074     break;
2075   }
2076   case OMPRTL__kmpc_reduce: {
2077     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2078     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2079     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2080     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2081     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2082                                                /*isVarArg=*/false);
2083     llvm::Type *TypeParams[] = {
2084         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2085         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2086         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2087     auto *FnTy =
2088         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2089     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2090     break;
2091   }
2092   case OMPRTL__kmpc_reduce_nowait: {
2093     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2094     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2095     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2096     // *lck);
2097     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2098     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2099                                                /*isVarArg=*/false);
2100     llvm::Type *TypeParams[] = {
2101         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2102         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2103         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2104     auto *FnTy =
2105         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2106     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2107     break;
2108   }
2109   case OMPRTL__kmpc_end_reduce: {
2110     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2111     // kmp_critical_name *lck);
2112     llvm::Type *TypeParams[] = {
2113         getIdentTyPointerTy(), CGM.Int32Ty,
2114         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2115     auto *FnTy =
2116         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2117     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2118     break;
2119   }
2120   case OMPRTL__kmpc_end_reduce_nowait: {
2121     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2122     // kmp_critical_name *lck);
2123     llvm::Type *TypeParams[] = {
2124         getIdentTyPointerTy(), CGM.Int32Ty,
2125         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2126     auto *FnTy =
2127         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2128     RTLFn =
2129         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2130     break;
2131   }
2132   case OMPRTL__kmpc_omp_task_begin_if0: {
2133     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2134     // *new_task);
2135     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2136                                 CGM.VoidPtrTy};
2137     auto *FnTy =
2138         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2139     RTLFn =
2140         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2141     break;
2142   }
2143   case OMPRTL__kmpc_omp_task_complete_if0: {
2144     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2145     // *new_task);
2146     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2147                                 CGM.VoidPtrTy};
2148     auto *FnTy =
2149         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2150     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2151                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2152     break;
2153   }
2154   case OMPRTL__kmpc_ordered: {
2155     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2156     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2157     auto *FnTy =
2158         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2159     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2160     break;
2161   }
2162   case OMPRTL__kmpc_end_ordered: {
2163     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2164     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2165     auto *FnTy =
2166         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2167     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2168     break;
2169   }
2170   case OMPRTL__kmpc_omp_taskwait: {
2171     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2173     auto *FnTy =
2174         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2175     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2176     break;
2177   }
2178   case OMPRTL__kmpc_taskgroup: {
2179     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2181     auto *FnTy =
2182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2183     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2184     break;
2185   }
2186   case OMPRTL__kmpc_end_taskgroup: {
2187     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2188     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2189     auto *FnTy =
2190         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2191     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2192     break;
2193   }
2194   case OMPRTL__kmpc_push_proc_bind: {
2195     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2196     // int proc_bind)
2197     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2201     break;
2202   }
2203   case OMPRTL__kmpc_omp_task_with_deps: {
2204     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2205     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2206     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2207     llvm::Type *TypeParams[] = {
2208         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2209         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2212     RTLFn =
2213         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2214     break;
2215   }
2216   case OMPRTL__kmpc_omp_wait_deps: {
2217     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2218     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2219     // kmp_depend_info_t *noalias_dep_list);
2220     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2221                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2222                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2223     auto *FnTy =
2224         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2225     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2226     break;
2227   }
2228   case OMPRTL__kmpc_cancellationpoint: {
2229     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2230     // global_tid, kmp_int32 cncl_kind)
2231     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2232     auto *FnTy =
2233         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2234     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2235     break;
2236   }
2237   case OMPRTL__kmpc_cancel: {
2238     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2239     // kmp_int32 cncl_kind)
2240     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2241     auto *FnTy =
2242         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2243     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2244     break;
2245   }
2246   case OMPRTL__kmpc_push_num_teams: {
2247     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2248     // kmp_int32 num_teams, kmp_int32 num_threads)
2249     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2250         CGM.Int32Ty};
2251     auto *FnTy =
2252         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2253     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2254     break;
2255   }
2256   case OMPRTL__kmpc_fork_teams: {
2257     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2258     // microtask, ...);
2259     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2260                                 getKmpc_MicroPointerTy()};
2261     auto *FnTy =
2262         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2263     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2264     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2265       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2266         llvm::LLVMContext &Ctx = F->getContext();
2267         llvm::MDBuilder MDB(Ctx);
2268         // Annotate the callback behavior of the __kmpc_fork_teams:
2269         //  - The callback callee is argument number 2 (microtask).
2270         //  - The first two arguments of the callback callee are unknown (-1).
2271         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2272         //    callback callee.
2273         F->addMetadata(
2274             llvm::LLVMContext::MD_callback,
2275             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2276                                         2, {-1, -1},
2277                                         /* VarArgsArePassed */ true)}));
2278       }
2279     }
2280     break;
2281   }
2282   case OMPRTL__kmpc_taskloop: {
2283     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2284     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2285     // sched, kmp_uint64 grainsize, void *task_dup);
2286     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2287                                 CGM.IntTy,
2288                                 CGM.VoidPtrTy,
2289                                 CGM.IntTy,
2290                                 CGM.Int64Ty->getPointerTo(),
2291                                 CGM.Int64Ty->getPointerTo(),
2292                                 CGM.Int64Ty,
2293                                 CGM.IntTy,
2294                                 CGM.IntTy,
2295                                 CGM.Int64Ty,
2296                                 CGM.VoidPtrTy};
2297     auto *FnTy =
2298         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2299     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2300     break;
2301   }
2302   case OMPRTL__kmpc_doacross_init: {
2303     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2304     // num_dims, struct kmp_dim *dims);
2305     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2306                                 CGM.Int32Ty,
2307                                 CGM.Int32Ty,
2308                                 CGM.VoidPtrTy};
2309     auto *FnTy =
2310         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2311     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2312     break;
2313   }
2314   case OMPRTL__kmpc_doacross_fini: {
2315     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2316     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2320     break;
2321   }
2322   case OMPRTL__kmpc_doacross_post: {
2323     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2324     // *vec);
2325     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2326                                 CGM.Int64Ty->getPointerTo()};
2327     auto *FnTy =
2328         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2329     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2330     break;
2331   }
2332   case OMPRTL__kmpc_doacross_wait: {
2333     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2334     // *vec);
2335     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2336                                 CGM.Int64Ty->getPointerTo()};
2337     auto *FnTy =
2338         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2339     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2340     break;
2341   }
2342   case OMPRTL__kmpc_taskred_init: {
2343     // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data);
2344     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2345     auto *FnTy =
2346         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2347     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init");
2348     break;
2349   }
2350   case OMPRTL__kmpc_task_reduction_get_th_data: {
2351     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2352     // *d);
2353     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2354     auto *FnTy =
2355         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2356     RTLFn = CGM.CreateRuntimeFunction(
2357         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2358     break;
2359   }
2360   case OMPRTL__kmpc_taskred_modifier_init: {
2361     // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
2362     // is_ws, int num_data, void *data);
2363     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
2364                                 CGM.IntTy, CGM.VoidPtrTy};
2365     auto *FnTy =
2366         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2367     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2368                                       /*Name=*/"__kmpc_taskred_modifier_init");
2369     break;
2370   }
2371   case OMPRTL__kmpc_task_reduction_modifier_fini: {
2372     // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
2373     // int is_ws);
2374     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
2375     auto *FnTy =
2376         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2377     RTLFn = CGM.CreateRuntimeFunction(
2378         FnTy,
2379         /*Name=*/"__kmpc_task_reduction_modifier_fini");
2380     break;
2381   }
2382   case OMPRTL__kmpc_alloc: {
2383     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2384     // al); omp_allocator_handle_t type is void *.
2385     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2386     auto *FnTy =
2387         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2388     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2389     break;
2390   }
2391   case OMPRTL__kmpc_free: {
2392     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2393     // al); omp_allocator_handle_t type is void *.
2394     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2398     break;
2399   }
2400   case OMPRTL__kmpc_init_allocator: {
2401     // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid,
2402     // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]);
2403     // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is
2404     // void*.
2405     auto *FnTy = llvm::FunctionType::get(
2406         CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy},
2407         /*isVarArg=*/false);
2408     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator");
2409     break;
2410   }
2411   case OMPRTL__kmpc_destroy_allocator: {
2412     // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
2413     // omp_allocator_handle_t type is void*.
2414     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy},
2415                                          /*isVarArg=*/false);
2416     RTLFn =
2417         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator");
2418     break;
2419   }
2420   case OMPRTL__kmpc_push_target_tripcount: {
2421     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2422     // size);
2423     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2424     llvm::FunctionType *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2427     break;
2428   }
2429   case OMPRTL__tgt_target: {
2430     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.VoidPtrTy,
2435                                 CGM.Int32Ty,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.VoidPtrPtrTy,
2438                                 CGM.Int64Ty->getPointerTo(),
2439                                 CGM.Int64Ty->getPointerTo()};
2440     auto *FnTy =
2441         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2443     break;
2444   }
2445   case OMPRTL__tgt_target_nowait: {
2446     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2447     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2448     // int64_t *arg_types);
2449     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2450                                 CGM.VoidPtrTy,
2451                                 CGM.Int32Ty,
2452                                 CGM.VoidPtrPtrTy,
2453                                 CGM.VoidPtrPtrTy,
2454                                 CGM.Int64Ty->getPointerTo(),
2455                                 CGM.Int64Ty->getPointerTo()};
2456     auto *FnTy =
2457         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2458     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2459     break;
2460   }
2461   case OMPRTL__tgt_target_teams: {
2462     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2463     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2464     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2465     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2466                                 CGM.VoidPtrTy,
2467                                 CGM.Int32Ty,
2468                                 CGM.VoidPtrPtrTy,
2469                                 CGM.VoidPtrPtrTy,
2470                                 CGM.Int64Ty->getPointerTo(),
2471                                 CGM.Int64Ty->getPointerTo(),
2472                                 CGM.Int32Ty,
2473                                 CGM.Int32Ty};
2474     auto *FnTy =
2475         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2477     break;
2478   }
2479   case OMPRTL__tgt_target_teams_nowait: {
2480     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2481     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2482     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2483     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2484                                 CGM.VoidPtrTy,
2485                                 CGM.Int32Ty,
2486                                 CGM.VoidPtrPtrTy,
2487                                 CGM.VoidPtrPtrTy,
2488                                 CGM.Int64Ty->getPointerTo(),
2489                                 CGM.Int64Ty->getPointerTo(),
2490                                 CGM.Int32Ty,
2491                                 CGM.Int32Ty};
2492     auto *FnTy =
2493         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2494     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2495     break;
2496   }
2497   case OMPRTL__tgt_register_requires: {
2498     // Build void __tgt_register_requires(int64_t flags);
2499     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2500     auto *FnTy =
2501         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2502     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2503     break;
2504   }
2505   case OMPRTL__tgt_target_data_begin: {
2506     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2507     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2508     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2509                                 CGM.Int32Ty,
2510                                 CGM.VoidPtrPtrTy,
2511                                 CGM.VoidPtrPtrTy,
2512                                 CGM.Int64Ty->getPointerTo(),
2513                                 CGM.Int64Ty->getPointerTo()};
2514     auto *FnTy =
2515         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2516     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2517     break;
2518   }
2519   case OMPRTL__tgt_target_data_begin_nowait: {
2520     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2521     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2522     // *arg_types);
2523     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2524                                 CGM.Int32Ty,
2525                                 CGM.VoidPtrPtrTy,
2526                                 CGM.VoidPtrPtrTy,
2527                                 CGM.Int64Ty->getPointerTo(),
2528                                 CGM.Int64Ty->getPointerTo()};
2529     auto *FnTy =
2530         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2531     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2532     break;
2533   }
2534   case OMPRTL__tgt_target_data_end: {
2535     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2536     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2537     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2538                                 CGM.Int32Ty,
2539                                 CGM.VoidPtrPtrTy,
2540                                 CGM.VoidPtrPtrTy,
2541                                 CGM.Int64Ty->getPointerTo(),
2542                                 CGM.Int64Ty->getPointerTo()};
2543     auto *FnTy =
2544         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2545     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2546     break;
2547   }
2548   case OMPRTL__tgt_target_data_end_nowait: {
2549     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2550     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2551     // *arg_types);
2552     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2553                                 CGM.Int32Ty,
2554                                 CGM.VoidPtrPtrTy,
2555                                 CGM.VoidPtrPtrTy,
2556                                 CGM.Int64Ty->getPointerTo(),
2557                                 CGM.Int64Ty->getPointerTo()};
2558     auto *FnTy =
2559         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2561     break;
2562   }
2563   case OMPRTL__tgt_target_data_update: {
2564     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2565     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2566     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2567                                 CGM.Int32Ty,
2568                                 CGM.VoidPtrPtrTy,
2569                                 CGM.VoidPtrPtrTy,
2570                                 CGM.Int64Ty->getPointerTo(),
2571                                 CGM.Int64Ty->getPointerTo()};
2572     auto *FnTy =
2573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2575     break;
2576   }
2577   case OMPRTL__tgt_target_data_update_nowait: {
2578     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2579     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2580     // *arg_types);
2581     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2582                                 CGM.Int32Ty,
2583                                 CGM.VoidPtrPtrTy,
2584                                 CGM.VoidPtrPtrTy,
2585                                 CGM.Int64Ty->getPointerTo(),
2586                                 CGM.Int64Ty->getPointerTo()};
2587     auto *FnTy =
2588         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2589     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2590     break;
2591   }
2592   case OMPRTL__tgt_mapper_num_components: {
2593     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2594     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2595     auto *FnTy =
2596         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2597     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2598     break;
2599   }
2600   case OMPRTL__tgt_push_mapper_component: {
2601     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2602     // *base, void *begin, int64_t size, int64_t type);
2603     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2604                                 CGM.Int64Ty, CGM.Int64Ty};
2605     auto *FnTy =
2606         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2607     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2608     break;
2609   }
2610   case OMPRTL__kmpc_task_allow_completion_event: {
2611     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
2612     // int gtid, kmp_task_t *task);
2613     auto *FnTy = llvm::FunctionType::get(
2614         CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy},
2615         /*isVarArg=*/false);
2616     RTLFn =
2617         CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event");
2618     break;
2619   }
2620   }
2621   assert(RTLFn && "Unable to find OpenMP runtime function");
2622   return RTLFn;
2623 }
2624 
2625 llvm::FunctionCallee
2626 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2627   assert((IVSize == 32 || IVSize == 64) &&
2628          "IV size is not compatible with the omp runtime");
2629   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2630                                             : "__kmpc_for_static_init_4u")
2631                                 : (IVSigned ? "__kmpc_for_static_init_8"
2632                                             : "__kmpc_for_static_init_8u");
2633   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2634   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2635   llvm::Type *TypeParams[] = {
2636     getIdentTyPointerTy(),                     // loc
2637     CGM.Int32Ty,                               // tid
2638     CGM.Int32Ty,                               // schedtype
2639     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2640     PtrTy,                                     // p_lower
2641     PtrTy,                                     // p_upper
2642     PtrTy,                                     // p_stride
2643     ITy,                                       // incr
2644     ITy                                        // chunk
2645   };
2646   auto *FnTy =
2647       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2648   return CGM.CreateRuntimeFunction(FnTy, Name);
2649 }
2650 
2651 llvm::FunctionCallee
2652 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2653   assert((IVSize == 32 || IVSize == 64) &&
2654          "IV size is not compatible with the omp runtime");
2655   StringRef Name =
2656       IVSize == 32
2657           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2658           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2659   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2660   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2661                                CGM.Int32Ty,           // tid
2662                                CGM.Int32Ty,           // schedtype
2663                                ITy,                   // lower
2664                                ITy,                   // upper
2665                                ITy,                   // stride
2666                                ITy                    // chunk
2667   };
2668   auto *FnTy =
2669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2670   return CGM.CreateRuntimeFunction(FnTy, Name);
2671 }
2672 
2673 llvm::FunctionCallee
2674 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2675   assert((IVSize == 32 || IVSize == 64) &&
2676          "IV size is not compatible with the omp runtime");
2677   StringRef Name =
2678       IVSize == 32
2679           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2680           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2681   llvm::Type *TypeParams[] = {
2682       getIdentTyPointerTy(), // loc
2683       CGM.Int32Ty,           // tid
2684   };
2685   auto *FnTy =
2686       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2687   return CGM.CreateRuntimeFunction(FnTy, Name);
2688 }
2689 
2690 llvm::FunctionCallee
2691 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2692   assert((IVSize == 32 || IVSize == 64) &&
2693          "IV size is not compatible with the omp runtime");
2694   StringRef Name =
2695       IVSize == 32
2696           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2697           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2698   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2699   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2700   llvm::Type *TypeParams[] = {
2701     getIdentTyPointerTy(),                     // loc
2702     CGM.Int32Ty,                               // tid
2703     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2704     PtrTy,                                     // p_lower
2705     PtrTy,                                     // p_upper
2706     PtrTy                                      // p_stride
2707   };
2708   auto *FnTy =
2709       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2710   return CGM.CreateRuntimeFunction(FnTy, Name);
2711 }
2712 
2713 /// Obtain information that uniquely identifies a target entry. This
2714 /// consists of the file and device IDs as well as line number associated with
2715 /// the relevant entry source location.
2716 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2717                                      unsigned &DeviceID, unsigned &FileID,
2718                                      unsigned &LineNum) {
2719   SourceManager &SM = C.getSourceManager();
2720 
2721   // The loc should be always valid and have a file ID (the user cannot use
2722   // #pragma directives in macros)
2723 
2724   assert(Loc.isValid() && "Source location is expected to be always valid.");
2725 
2726   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2727   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2728 
2729   llvm::sys::fs::UniqueID ID;
2730   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2731     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2732         << PLoc.getFilename() << EC.message();
2733 
2734   DeviceID = ID.getDevice();
2735   FileID = ID.getFile();
2736   LineNum = PLoc.getLine();
2737 }
2738 
2739 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2740   if (CGM.getLangOpts().OpenMPSimd)
2741     return Address::invalid();
2742   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2743       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2744   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2745               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2746                HasRequiresUnifiedSharedMemory))) {
2747     SmallString<64> PtrName;
2748     {
2749       llvm::raw_svector_ostream OS(PtrName);
2750       OS << CGM.getMangledName(GlobalDecl(VD));
2751       if (!VD->isExternallyVisible()) {
2752         unsigned DeviceID, FileID, Line;
2753         getTargetEntryUniqueInfo(CGM.getContext(),
2754                                  VD->getCanonicalDecl()->getBeginLoc(),
2755                                  DeviceID, FileID, Line);
2756         OS << llvm::format("_%x", FileID);
2757       }
2758       OS << "_decl_tgt_ref_ptr";
2759     }
2760     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2761     if (!Ptr) {
2762       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2763       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2764                                         PtrName);
2765 
2766       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2767       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2768 
2769       if (!CGM.getLangOpts().OpenMPIsDevice)
2770         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2771       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2772     }
2773     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2774   }
2775   return Address::invalid();
2776 }
2777 
2778 llvm::Constant *
2779 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2780   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2781          !CGM.getContext().getTargetInfo().isTLSSupported());
2782   // Lookup the entry, lazily creating it if necessary.
2783   std::string Suffix = getName({"cache", ""});
2784   return getOrCreateInternalVariable(
2785       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2786 }
2787 
2788 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2789                                                 const VarDecl *VD,
2790                                                 Address VDAddr,
2791                                                 SourceLocation Loc) {
2792   if (CGM.getLangOpts().OpenMPUseTLS &&
2793       CGM.getContext().getTargetInfo().isTLSSupported())
2794     return VDAddr;
2795 
2796   llvm::Type *VarTy = VDAddr.getElementType();
2797   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2798                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2799                                                        CGM.Int8PtrTy),
2800                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2801                          getOrCreateThreadPrivateCache(VD)};
2802   return Address(CGF.EmitRuntimeCall(
2803       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2804                  VDAddr.getAlignment());
2805 }
2806 
2807 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2808     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2809     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2810   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2811   // library.
2812   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2813   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2814                       OMPLoc);
2815   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2816   // to register constructor/destructor for variable.
2817   llvm::Value *Args[] = {
2818       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2819       Ctor, CopyCtor, Dtor};
2820   CGF.EmitRuntimeCall(
2821       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2822 }
2823 
2824 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2825     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2826     bool PerformInit, CodeGenFunction *CGF) {
2827   if (CGM.getLangOpts().OpenMPUseTLS &&
2828       CGM.getContext().getTargetInfo().isTLSSupported())
2829     return nullptr;
2830 
2831   VD = VD->getDefinition(CGM.getContext());
2832   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2833     QualType ASTTy = VD->getType();
2834 
2835     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2836     const Expr *Init = VD->getAnyInitializer();
2837     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2838       // Generate function that re-emits the declaration's initializer into the
2839       // threadprivate copy of the variable VD
2840       CodeGenFunction CtorCGF(CGM);
2841       FunctionArgList Args;
2842       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2843                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2844                             ImplicitParamDecl::Other);
2845       Args.push_back(&Dst);
2846 
2847       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2848           CGM.getContext().VoidPtrTy, Args);
2849       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2850       std::string Name = getName({"__kmpc_global_ctor_", ""});
2851       llvm::Function *Fn =
2852           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2853       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2854                             Args, Loc, Loc);
2855       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2856           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2857           CGM.getContext().VoidPtrTy, Dst.getLocation());
2858       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2859       Arg = CtorCGF.Builder.CreateElementBitCast(
2860           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2861       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2862                                /*IsInitializer=*/true);
2863       ArgVal = CtorCGF.EmitLoadOfScalar(
2864           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2865           CGM.getContext().VoidPtrTy, Dst.getLocation());
2866       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2867       CtorCGF.FinishFunction();
2868       Ctor = Fn;
2869     }
2870     if (VD->getType().isDestructedType() != QualType::DK_none) {
2871       // Generate function that emits destructor call for the threadprivate copy
2872       // of the variable VD
2873       CodeGenFunction DtorCGF(CGM);
2874       FunctionArgList Args;
2875       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2876                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2877                             ImplicitParamDecl::Other);
2878       Args.push_back(&Dst);
2879 
2880       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2881           CGM.getContext().VoidTy, Args);
2882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2883       std::string Name = getName({"__kmpc_global_dtor_", ""});
2884       llvm::Function *Fn =
2885           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2886       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2887       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2888                             Loc, Loc);
2889       // Create a scope with an artificial location for the body of this function.
2890       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2891       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2892           DtorCGF.GetAddrOfLocalVar(&Dst),
2893           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2894       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2895                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2896                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2897       DtorCGF.FinishFunction();
2898       Dtor = Fn;
2899     }
2900     // Do not emit init function if it is not required.
2901     if (!Ctor && !Dtor)
2902       return nullptr;
2903 
2904     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2905     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2906                                                /*isVarArg=*/false)
2907                            ->getPointerTo();
2908     // Copying constructor for the threadprivate variable.
2909     // Must be NULL - reserved by runtime, but currently it requires that this
2910     // parameter is always NULL. Otherwise it fires assertion.
2911     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2912     if (Ctor == nullptr) {
2913       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2914                                              /*isVarArg=*/false)
2915                          ->getPointerTo();
2916       Ctor = llvm::Constant::getNullValue(CtorTy);
2917     }
2918     if (Dtor == nullptr) {
2919       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2920                                              /*isVarArg=*/false)
2921                          ->getPointerTo();
2922       Dtor = llvm::Constant::getNullValue(DtorTy);
2923     }
2924     if (!CGF) {
2925       auto *InitFunctionTy =
2926           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2927       std::string Name = getName({"__omp_threadprivate_init_", ""});
2928       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2929           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2930       CodeGenFunction InitCGF(CGM);
2931       FunctionArgList ArgList;
2932       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2933                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2934                             Loc, Loc);
2935       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2936       InitCGF.FinishFunction();
2937       return InitFunction;
2938     }
2939     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2940   }
2941   return nullptr;
2942 }
2943 
2944 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2945                                                      llvm::GlobalVariable *Addr,
2946                                                      bool PerformInit) {
2947   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2948       !CGM.getLangOpts().OpenMPIsDevice)
2949     return false;
2950   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2951       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2952   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2953       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2954        HasRequiresUnifiedSharedMemory))
2955     return CGM.getLangOpts().OpenMPIsDevice;
2956   VD = VD->getDefinition(CGM.getContext());
2957   assert(VD && "Unknown VarDecl");
2958 
2959   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2960     return CGM.getLangOpts().OpenMPIsDevice;
2961 
2962   QualType ASTTy = VD->getType();
2963   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2964 
2965   // Produce the unique prefix to identify the new target regions. We use
2966   // the source location of the variable declaration which we know to not
2967   // conflict with any target region.
2968   unsigned DeviceID;
2969   unsigned FileID;
2970   unsigned Line;
2971   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2972   SmallString<128> Buffer, Out;
2973   {
2974     llvm::raw_svector_ostream OS(Buffer);
2975     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2976        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2977   }
2978 
2979   const Expr *Init = VD->getAnyInitializer();
2980   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2981     llvm::Constant *Ctor;
2982     llvm::Constant *ID;
2983     if (CGM.getLangOpts().OpenMPIsDevice) {
2984       // Generate function that re-emits the declaration's initializer into
2985       // the threadprivate copy of the variable VD
2986       CodeGenFunction CtorCGF(CGM);
2987 
2988       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2992       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2993       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994                             FunctionArgList(), Loc, Loc);
2995       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2996       CtorCGF.EmitAnyExprToMem(Init,
2997                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2998                                Init->getType().getQualifiers(),
2999                                /*IsInitializer=*/true);
3000       CtorCGF.FinishFunction();
3001       Ctor = Fn;
3002       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3003       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
3004     } else {
3005       Ctor = new llvm::GlobalVariable(
3006           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3007           llvm::GlobalValue::PrivateLinkage,
3008           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
3009       ID = Ctor;
3010     }
3011 
3012     // Register the information for the entry associated with the constructor.
3013     Out.clear();
3014     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
3016         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
3017   }
3018   if (VD->getType().isDestructedType() != QualType::DK_none) {
3019     llvm::Constant *Dtor;
3020     llvm::Constant *ID;
3021     if (CGM.getLangOpts().OpenMPIsDevice) {
3022       // Generate function that emits destructor call for the threadprivate
3023       // copy of the variable VD
3024       CodeGenFunction DtorCGF(CGM);
3025 
3026       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3027       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3028       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3029           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3030       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3031       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3032                             FunctionArgList(), Loc, Loc);
3033       // Create a scope with an artificial location for the body of this
3034       // function.
3035       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3036       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3037                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3038                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3039       DtorCGF.FinishFunction();
3040       Dtor = Fn;
3041       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3042       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3043     } else {
3044       Dtor = new llvm::GlobalVariable(
3045           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3046           llvm::GlobalValue::PrivateLinkage,
3047           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3048       ID = Dtor;
3049     }
3050     // Register the information for the entry associated with the destructor.
3051     Out.clear();
3052     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3053         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3054         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3055   }
3056   return CGM.getLangOpts().OpenMPIsDevice;
3057 }
3058 
3059 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3060                                                           QualType VarType,
3061                                                           StringRef Name) {
3062   std::string Suffix = getName({"artificial", ""});
3063   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3064   llvm::Value *GAddr =
3065       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3066   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3067       CGM.getTarget().isTLSSupported()) {
3068     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3069     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3070   }
3071   std::string CacheSuffix = getName({"cache", ""});
3072   llvm::Value *Args[] = {
3073       emitUpdateLocation(CGF, SourceLocation()),
3074       getThreadID(CGF, SourceLocation()),
3075       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3076       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3077                                 /*isSigned=*/false),
3078       getOrCreateInternalVariable(
3079           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3080   return Address(
3081       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3082           CGF.EmitRuntimeCall(
3083               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3084           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3085       CGM.getContext().getTypeAlignInChars(VarType));
3086 }
3087 
3088 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3089                                    const RegionCodeGenTy &ThenGen,
3090                                    const RegionCodeGenTy &ElseGen) {
3091   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3092 
3093   // If the condition constant folds and can be elided, try to avoid emitting
3094   // the condition and the dead arm of the if/else.
3095   bool CondConstant;
3096   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3097     if (CondConstant)
3098       ThenGen(CGF);
3099     else
3100       ElseGen(CGF);
3101     return;
3102   }
3103 
3104   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3105   // emit the conditional branch.
3106   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3107   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3108   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3109   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3110 
3111   // Emit the 'then' code.
3112   CGF.EmitBlock(ThenBlock);
3113   ThenGen(CGF);
3114   CGF.EmitBranch(ContBlock);
3115   // Emit the 'else' code if present.
3116   // There is no need to emit line number for unconditional branch.
3117   (void)ApplyDebugLocation::CreateEmpty(CGF);
3118   CGF.EmitBlock(ElseBlock);
3119   ElseGen(CGF);
3120   // There is no need to emit line number for unconditional branch.
3121   (void)ApplyDebugLocation::CreateEmpty(CGF);
3122   CGF.EmitBranch(ContBlock);
3123   // Emit the continuation block for code after the if.
3124   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3125 }
3126 
3127 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3128                                        llvm::Function *OutlinedFn,
3129                                        ArrayRef<llvm::Value *> CapturedVars,
3130                                        const Expr *IfCond) {
3131   if (!CGF.HaveInsertPoint())
3132     return;
3133   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3134   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3135                                                      PrePostActionTy &) {
3136     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3137     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3138     llvm::Value *Args[] = {
3139         RTLoc,
3140         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3141         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3142     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3143     RealArgs.append(std::begin(Args), std::end(Args));
3144     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3145 
3146     llvm::FunctionCallee RTLFn =
3147         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3148     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3149   };
3150   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3151                                                           PrePostActionTy &) {
3152     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3153     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3154     // Build calls:
3155     // __kmpc_serialized_parallel(&Loc, GTid);
3156     llvm::Value *Args[] = {RTLoc, ThreadID};
3157     CGF.EmitRuntimeCall(
3158         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3159 
3160     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3161     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3162     Address ZeroAddrBound =
3163         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3164                                          /*Name=*/".bound.zero.addr");
3165     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3166     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3167     // ThreadId for serialized parallels is 0.
3168     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3169     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3170     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3171     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3172 
3173     // __kmpc_end_serialized_parallel(&Loc, GTid);
3174     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3175     CGF.EmitRuntimeCall(
3176         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3177         EndArgs);
3178   };
3179   if (IfCond) {
3180     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3181   } else {
3182     RegionCodeGenTy ThenRCG(ThenGen);
3183     ThenRCG(CGF);
3184   }
3185 }
3186 
3187 // If we're inside an (outlined) parallel region, use the region info's
3188 // thread-ID variable (it is passed in a first argument of the outlined function
3189 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3190 // regular serial code region, get thread ID by calling kmp_int32
3191 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3192 // return the address of that temp.
3193 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3194                                              SourceLocation Loc) {
3195   if (auto *OMPRegionInfo =
3196           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3197     if (OMPRegionInfo->getThreadIDVariable())
3198       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3199 
3200   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3201   QualType Int32Ty =
3202       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3203   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3204   CGF.EmitStoreOfScalar(ThreadID,
3205                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3206 
3207   return ThreadIDTemp;
3208 }
3209 
3210 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3211     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3212   SmallString<256> Buffer;
3213   llvm::raw_svector_ostream Out(Buffer);
3214   Out << Name;
3215   StringRef RuntimeName = Out.str();
3216   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3217   if (Elem.second) {
3218     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3219            "OMP internal variable has different type than requested");
3220     return &*Elem.second;
3221   }
3222 
3223   return Elem.second = new llvm::GlobalVariable(
3224              CGM.getModule(), Ty, /*IsConstant*/ false,
3225              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3226              Elem.first(), /*InsertBefore=*/nullptr,
3227              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3228 }
3229 
3230 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3231   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3232   std::string Name = getName({Prefix, "var"});
3233   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3234 }
3235 
3236 namespace {
3237 /// Common pre(post)-action for different OpenMP constructs.
3238 class CommonActionTy final : public PrePostActionTy {
3239   llvm::FunctionCallee EnterCallee;
3240   ArrayRef<llvm::Value *> EnterArgs;
3241   llvm::FunctionCallee ExitCallee;
3242   ArrayRef<llvm::Value *> ExitArgs;
3243   bool Conditional;
3244   llvm::BasicBlock *ContBlock = nullptr;
3245 
3246 public:
3247   CommonActionTy(llvm::FunctionCallee EnterCallee,
3248                  ArrayRef<llvm::Value *> EnterArgs,
3249                  llvm::FunctionCallee ExitCallee,
3250                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3251       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3252         ExitArgs(ExitArgs), Conditional(Conditional) {}
3253   void Enter(CodeGenFunction &CGF) override {
3254     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3255     if (Conditional) {
3256       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3257       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3258       ContBlock = CGF.createBasicBlock("omp_if.end");
3259       // Generate the branch (If-stmt)
3260       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3261       CGF.EmitBlock(ThenBlock);
3262     }
3263   }
3264   void Done(CodeGenFunction &CGF) {
3265     // Emit the rest of blocks/branches
3266     CGF.EmitBranch(ContBlock);
3267     CGF.EmitBlock(ContBlock, true);
3268   }
3269   void Exit(CodeGenFunction &CGF) override {
3270     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3271   }
3272 };
3273 } // anonymous namespace
3274 
3275 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3276                                          StringRef CriticalName,
3277                                          const RegionCodeGenTy &CriticalOpGen,
3278                                          SourceLocation Loc, const Expr *Hint) {
3279   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3280   // CriticalOpGen();
3281   // __kmpc_end_critical(ident_t *, gtid, Lock);
3282   // Prepare arguments and build a call to __kmpc_critical
3283   if (!CGF.HaveInsertPoint())
3284     return;
3285   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3286                          getCriticalRegionLock(CriticalName)};
3287   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3288                                                 std::end(Args));
3289   if (Hint) {
3290     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3291         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3292   }
3293   CommonActionTy Action(
3294       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3295                                  : OMPRTL__kmpc_critical),
3296       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3297   CriticalOpGen.setAction(Action);
3298   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3299 }
3300 
3301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3302                                        const RegionCodeGenTy &MasterOpGen,
3303                                        SourceLocation Loc) {
3304   if (!CGF.HaveInsertPoint())
3305     return;
3306   // if(__kmpc_master(ident_t *, gtid)) {
3307   //   MasterOpGen();
3308   //   __kmpc_end_master(ident_t *, gtid);
3309   // }
3310   // Prepare arguments and build a call to __kmpc_master
3311   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3312   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3313                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3314                         /*Conditional=*/true);
3315   MasterOpGen.setAction(Action);
3316   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3317   Action.Done(CGF);
3318 }
3319 
3320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3321                                         SourceLocation Loc) {
3322   if (!CGF.HaveInsertPoint())
3323     return;
3324   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3325   if (OMPBuilder) {
3326     OMPBuilder->CreateTaskyield(CGF.Builder);
3327   } else {
3328     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3329     llvm::Value *Args[] = {
3330         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3331         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3332     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3333                         Args);
3334   }
3335 
3336   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3337     Region->emitUntiedSwitch(CGF);
3338 }
3339 
3340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3341                                           const RegionCodeGenTy &TaskgroupOpGen,
3342                                           SourceLocation Loc) {
3343   if (!CGF.HaveInsertPoint())
3344     return;
3345   // __kmpc_taskgroup(ident_t *, gtid);
3346   // TaskgroupOpGen();
3347   // __kmpc_end_taskgroup(ident_t *, gtid);
3348   // Prepare arguments and build a call to __kmpc_taskgroup
3349   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3350   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3351                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3352                         Args);
3353   TaskgroupOpGen.setAction(Action);
3354   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3355 }
3356 
3357 /// Given an array of pointers to variables, project the address of a
3358 /// given variable.
3359 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3360                                       unsigned Index, const VarDecl *Var) {
3361   // Pull out the pointer to the variable.
3362   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3363   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3364 
3365   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3366   Addr = CGF.Builder.CreateElementBitCast(
3367       Addr, CGF.ConvertTypeForMem(Var->getType()));
3368   return Addr;
3369 }
3370 
3371 static llvm::Value *emitCopyprivateCopyFunction(
3372     CodeGenModule &CGM, llvm::Type *ArgsType,
3373     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3374     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3375     SourceLocation Loc) {
3376   ASTContext &C = CGM.getContext();
3377   // void copy_func(void *LHSArg, void *RHSArg);
3378   FunctionArgList Args;
3379   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3380                            ImplicitParamDecl::Other);
3381   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3382                            ImplicitParamDecl::Other);
3383   Args.push_back(&LHSArg);
3384   Args.push_back(&RHSArg);
3385   const auto &CGFI =
3386       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3387   std::string Name =
3388       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3389   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3390                                     llvm::GlobalValue::InternalLinkage, Name,
3391                                     &CGM.getModule());
3392   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3393   Fn->setDoesNotRecurse();
3394   CodeGenFunction CGF(CGM);
3395   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3396   // Dest = (void*[n])(LHSArg);
3397   // Src = (void*[n])(RHSArg);
3398   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3399       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3400       ArgsType), CGF.getPointerAlign());
3401   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3402       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3403       ArgsType), CGF.getPointerAlign());
3404   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3405   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3406   // ...
3407   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3408   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3409     const auto *DestVar =
3410         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3411     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3412 
3413     const auto *SrcVar =
3414         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3415     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3416 
3417     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3418     QualType Type = VD->getType();
3419     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3420   }
3421   CGF.FinishFunction();
3422   return Fn;
3423 }
3424 
3425 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3426                                        const RegionCodeGenTy &SingleOpGen,
3427                                        SourceLocation Loc,
3428                                        ArrayRef<const Expr *> CopyprivateVars,
3429                                        ArrayRef<const Expr *> SrcExprs,
3430                                        ArrayRef<const Expr *> DstExprs,
3431                                        ArrayRef<const Expr *> AssignmentOps) {
3432   if (!CGF.HaveInsertPoint())
3433     return;
3434   assert(CopyprivateVars.size() == SrcExprs.size() &&
3435          CopyprivateVars.size() == DstExprs.size() &&
3436          CopyprivateVars.size() == AssignmentOps.size());
3437   ASTContext &C = CGM.getContext();
3438   // int32 did_it = 0;
3439   // if(__kmpc_single(ident_t *, gtid)) {
3440   //   SingleOpGen();
3441   //   __kmpc_end_single(ident_t *, gtid);
3442   //   did_it = 1;
3443   // }
3444   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3445   // <copy_func>, did_it);
3446 
3447   Address DidIt = Address::invalid();
3448   if (!CopyprivateVars.empty()) {
3449     // int32 did_it = 0;
3450     QualType KmpInt32Ty =
3451         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3452     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3453     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3454   }
3455   // Prepare arguments and build a call to __kmpc_single
3456   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3457   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3458                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3459                         /*Conditional=*/true);
3460   SingleOpGen.setAction(Action);
3461   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3462   if (DidIt.isValid()) {
3463     // did_it = 1;
3464     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3465   }
3466   Action.Done(CGF);
3467   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3468   // <copy_func>, did_it);
3469   if (DidIt.isValid()) {
3470     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3471     QualType CopyprivateArrayTy = C.getConstantArrayType(
3472         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3473         /*IndexTypeQuals=*/0);
3474     // Create a list of all private variables for copyprivate.
3475     Address CopyprivateList =
3476         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3477     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3478       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3479       CGF.Builder.CreateStore(
3480           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3481               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3482               CGF.VoidPtrTy),
3483           Elem);
3484     }
3485     // Build function that copies private values from single region to all other
3486     // threads in the corresponding parallel region.
3487     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3488         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3489         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3490     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3491     Address CL =
3492       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3493                                                       CGF.VoidPtrTy);
3494     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3495     llvm::Value *Args[] = {
3496         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3497         getThreadID(CGF, Loc),        // i32 <gtid>
3498         BufSize,                      // size_t <buf_size>
3499         CL.getPointer(),              // void *<copyprivate list>
3500         CpyFn,                        // void (*) (void *, void *) <copy_func>
3501         DidItVal                      // i32 did_it
3502     };
3503     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3504   }
3505 }
3506 
3507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3508                                         const RegionCodeGenTy &OrderedOpGen,
3509                                         SourceLocation Loc, bool IsThreads) {
3510   if (!CGF.HaveInsertPoint())
3511     return;
3512   // __kmpc_ordered(ident_t *, gtid);
3513   // OrderedOpGen();
3514   // __kmpc_end_ordered(ident_t *, gtid);
3515   // Prepare arguments and build a call to __kmpc_ordered
3516   if (IsThreads) {
3517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3518     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3519                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3520                           Args);
3521     OrderedOpGen.setAction(Action);
3522     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3523     return;
3524   }
3525   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3526 }
3527 
3528 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3529   unsigned Flags;
3530   if (Kind == OMPD_for)
3531     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3532   else if (Kind == OMPD_sections)
3533     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3534   else if (Kind == OMPD_single)
3535     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3536   else if (Kind == OMPD_barrier)
3537     Flags = OMP_IDENT_BARRIER_EXPL;
3538   else
3539     Flags = OMP_IDENT_BARRIER_IMPL;
3540   return Flags;
3541 }
3542 
3543 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3544     CodeGenFunction &CGF, const OMPLoopDirective &S,
3545     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3546   // Check if the loop directive is actually a doacross loop directive. In this
3547   // case choose static, 1 schedule.
3548   if (llvm::any_of(
3549           S.getClausesOfKind<OMPOrderedClause>(),
3550           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3551     ScheduleKind = OMPC_SCHEDULE_static;
3552     // Chunk size is 1 in this case.
3553     llvm::APInt ChunkSize(32, 1);
3554     ChunkExpr = IntegerLiteral::Create(
3555         CGF.getContext(), ChunkSize,
3556         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3557         SourceLocation());
3558   }
3559 }
3560 
3561 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3562                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3563                                       bool ForceSimpleCall) {
3564   // Check if we should use the OMPBuilder
3565   auto *OMPRegionInfo =
3566       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3567   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3568   if (OMPBuilder) {
3569     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3570         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3571     return;
3572   }
3573 
3574   if (!CGF.HaveInsertPoint())
3575     return;
3576   // Build call __kmpc_cancel_barrier(loc, thread_id);
3577   // Build call __kmpc_barrier(loc, thread_id);
3578   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3579   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3580   // thread_id);
3581   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3582                          getThreadID(CGF, Loc)};
3583   if (OMPRegionInfo) {
3584     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3585       llvm::Value *Result = CGF.EmitRuntimeCall(
3586           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3587       if (EmitChecks) {
3588         // if (__kmpc_cancel_barrier()) {
3589         //   exit from construct;
3590         // }
3591         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3592         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3593         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3594         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3595         CGF.EmitBlock(ExitBB);
3596         //   exit from construct;
3597         CodeGenFunction::JumpDest CancelDestination =
3598             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3599         CGF.EmitBranchThroughCleanup(CancelDestination);
3600         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3601       }
3602       return;
3603     }
3604   }
3605   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3606 }
3607 
3608 /// Map the OpenMP loop schedule to the runtime enumeration.
3609 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3610                                           bool Chunked, bool Ordered) {
3611   switch (ScheduleKind) {
3612   case OMPC_SCHEDULE_static:
3613     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3614                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3615   case OMPC_SCHEDULE_dynamic:
3616     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3617   case OMPC_SCHEDULE_guided:
3618     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3619   case OMPC_SCHEDULE_runtime:
3620     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3621   case OMPC_SCHEDULE_auto:
3622     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3623   case OMPC_SCHEDULE_unknown:
3624     assert(!Chunked && "chunk was specified but schedule kind not known");
3625     return Ordered ? OMP_ord_static : OMP_sch_static;
3626   }
3627   llvm_unreachable("Unexpected runtime schedule");
3628 }
3629 
3630 /// Map the OpenMP distribute schedule to the runtime enumeration.
3631 static OpenMPSchedType
3632 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3633   // only static is allowed for dist_schedule
3634   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3635 }
3636 
3637 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3638                                          bool Chunked) const {
3639   OpenMPSchedType Schedule =
3640       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3641   return Schedule == OMP_sch_static;
3642 }
3643 
3644 bool CGOpenMPRuntime::isStaticNonchunked(
3645     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3646   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3647   return Schedule == OMP_dist_sch_static;
3648 }
3649 
3650 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3651                                       bool Chunked) const {
3652   OpenMPSchedType Schedule =
3653       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3654   return Schedule == OMP_sch_static_chunked;
3655 }
3656 
3657 bool CGOpenMPRuntime::isStaticChunked(
3658     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3659   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3660   return Schedule == OMP_dist_sch_static_chunked;
3661 }
3662 
3663 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3664   OpenMPSchedType Schedule =
3665       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3666   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3667   return Schedule != OMP_sch_static;
3668 }
3669 
3670 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3671                                   OpenMPScheduleClauseModifier M1,
3672                                   OpenMPScheduleClauseModifier M2) {
3673   int Modifier = 0;
3674   switch (M1) {
3675   case OMPC_SCHEDULE_MODIFIER_monotonic:
3676     Modifier = OMP_sch_modifier_monotonic;
3677     break;
3678   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3679     Modifier = OMP_sch_modifier_nonmonotonic;
3680     break;
3681   case OMPC_SCHEDULE_MODIFIER_simd:
3682     if (Schedule == OMP_sch_static_chunked)
3683       Schedule = OMP_sch_static_balanced_chunked;
3684     break;
3685   case OMPC_SCHEDULE_MODIFIER_last:
3686   case OMPC_SCHEDULE_MODIFIER_unknown:
3687     break;
3688   }
3689   switch (M2) {
3690   case OMPC_SCHEDULE_MODIFIER_monotonic:
3691     Modifier = OMP_sch_modifier_monotonic;
3692     break;
3693   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3694     Modifier = OMP_sch_modifier_nonmonotonic;
3695     break;
3696   case OMPC_SCHEDULE_MODIFIER_simd:
3697     if (Schedule == OMP_sch_static_chunked)
3698       Schedule = OMP_sch_static_balanced_chunked;
3699     break;
3700   case OMPC_SCHEDULE_MODIFIER_last:
3701   case OMPC_SCHEDULE_MODIFIER_unknown:
3702     break;
3703   }
3704   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3705   // If the static schedule kind is specified or if the ordered clause is
3706   // specified, and if the nonmonotonic modifier is not specified, the effect is
3707   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3708   // modifier is specified, the effect is as if the nonmonotonic modifier is
3709   // specified.
3710   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3711     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3712           Schedule == OMP_sch_static_balanced_chunked ||
3713           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3714           Schedule == OMP_dist_sch_static_chunked ||
3715           Schedule == OMP_dist_sch_static))
3716       Modifier = OMP_sch_modifier_nonmonotonic;
3717   }
3718   return Schedule | Modifier;
3719 }
3720 
3721 void CGOpenMPRuntime::emitForDispatchInit(
3722     CodeGenFunction &CGF, SourceLocation Loc,
3723     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3724     bool Ordered, const DispatchRTInput &DispatchValues) {
3725   if (!CGF.HaveInsertPoint())
3726     return;
3727   OpenMPSchedType Schedule = getRuntimeSchedule(
3728       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3729   assert(Ordered ||
3730          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3731           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3732           Schedule != OMP_sch_static_balanced_chunked));
3733   // Call __kmpc_dispatch_init(
3734   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3735   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3736   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3737 
3738   // If the Chunk was not specified in the clause - use default value 1.
3739   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3740                                             : CGF.Builder.getIntN(IVSize, 1);
3741   llvm::Value *Args[] = {
3742       emitUpdateLocation(CGF, Loc),
3743       getThreadID(CGF, Loc),
3744       CGF.Builder.getInt32(addMonoNonMonoModifier(
3745           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3746       DispatchValues.LB,                                     // Lower
3747       DispatchValues.UB,                                     // Upper
3748       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3749       Chunk                                                  // Chunk
3750   };
3751   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3752 }
3753 
3754 static void emitForStaticInitCall(
3755     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3756     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3757     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3758     const CGOpenMPRuntime::StaticRTInput &Values) {
3759   if (!CGF.HaveInsertPoint())
3760     return;
3761 
3762   assert(!Values.Ordered);
3763   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3764          Schedule == OMP_sch_static_balanced_chunked ||
3765          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3766          Schedule == OMP_dist_sch_static ||
3767          Schedule == OMP_dist_sch_static_chunked);
3768 
3769   // Call __kmpc_for_static_init(
3770   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3771   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3772   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3773   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3774   llvm::Value *Chunk = Values.Chunk;
3775   if (Chunk == nullptr) {
3776     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3777             Schedule == OMP_dist_sch_static) &&
3778            "expected static non-chunked schedule");
3779     // If the Chunk was not specified in the clause - use default value 1.
3780     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3781   } else {
3782     assert((Schedule == OMP_sch_static_chunked ||
3783             Schedule == OMP_sch_static_balanced_chunked ||
3784             Schedule == OMP_ord_static_chunked ||
3785             Schedule == OMP_dist_sch_static_chunked) &&
3786            "expected static chunked schedule");
3787   }
3788   llvm::Value *Args[] = {
3789       UpdateLocation,
3790       ThreadId,
3791       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3792                                                   M2)), // Schedule type
3793       Values.IL.getPointer(),                           // &isLastIter
3794       Values.LB.getPointer(),                           // &LB
3795       Values.UB.getPointer(),                           // &UB
3796       Values.ST.getPointer(),                           // &Stride
3797       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3798       Chunk                                             // Chunk
3799   };
3800   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3801 }
3802 
3803 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3804                                         SourceLocation Loc,
3805                                         OpenMPDirectiveKind DKind,
3806                                         const OpenMPScheduleTy &ScheduleKind,
3807                                         const StaticRTInput &Values) {
3808   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3809       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3810   assert(isOpenMPWorksharingDirective(DKind) &&
3811          "Expected loop-based or sections-based directive.");
3812   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3813                                              isOpenMPLoopDirective(DKind)
3814                                                  ? OMP_IDENT_WORK_LOOP
3815                                                  : OMP_IDENT_WORK_SECTIONS);
3816   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3817   llvm::FunctionCallee StaticInitFunction =
3818       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3819   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3820   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3821                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3822 }
3823 
3824 void CGOpenMPRuntime::emitDistributeStaticInit(
3825     CodeGenFunction &CGF, SourceLocation Loc,
3826     OpenMPDistScheduleClauseKind SchedKind,
3827     const CGOpenMPRuntime::StaticRTInput &Values) {
3828   OpenMPSchedType ScheduleNum =
3829       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3830   llvm::Value *UpdatedLocation =
3831       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3832   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3833   llvm::FunctionCallee StaticInitFunction =
3834       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3835   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3836                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3837                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3838 }
3839 
3840 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3841                                           SourceLocation Loc,
3842                                           OpenMPDirectiveKind DKind) {
3843   if (!CGF.HaveInsertPoint())
3844     return;
3845   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3846   llvm::Value *Args[] = {
3847       emitUpdateLocation(CGF, Loc,
3848                          isOpenMPDistributeDirective(DKind)
3849                              ? OMP_IDENT_WORK_DISTRIBUTE
3850                              : isOpenMPLoopDirective(DKind)
3851                                    ? OMP_IDENT_WORK_LOOP
3852                                    : OMP_IDENT_WORK_SECTIONS),
3853       getThreadID(CGF, Loc)};
3854   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3855   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3856                       Args);
3857 }
3858 
3859 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3860                                                  SourceLocation Loc,
3861                                                  unsigned IVSize,
3862                                                  bool IVSigned) {
3863   if (!CGF.HaveInsertPoint())
3864     return;
3865   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3866   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3867   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3868 }
3869 
3870 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3871                                           SourceLocation Loc, unsigned IVSize,
3872                                           bool IVSigned, Address IL,
3873                                           Address LB, Address UB,
3874                                           Address ST) {
3875   // Call __kmpc_dispatch_next(
3876   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3877   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3878   //          kmp_int[32|64] *p_stride);
3879   llvm::Value *Args[] = {
3880       emitUpdateLocation(CGF, Loc),
3881       getThreadID(CGF, Loc),
3882       IL.getPointer(), // &isLastIter
3883       LB.getPointer(), // &Lower
3884       UB.getPointer(), // &Upper
3885       ST.getPointer()  // &Stride
3886   };
3887   llvm::Value *Call =
3888       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3889   return CGF.EmitScalarConversion(
3890       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3891       CGF.getContext().BoolTy, Loc);
3892 }
3893 
3894 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3895                                            llvm::Value *NumThreads,
3896                                            SourceLocation Loc) {
3897   if (!CGF.HaveInsertPoint())
3898     return;
3899   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3900   llvm::Value *Args[] = {
3901       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3902       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3903   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3904                       Args);
3905 }
3906 
3907 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3908                                          ProcBindKind ProcBind,
3909                                          SourceLocation Loc) {
3910   if (!CGF.HaveInsertPoint())
3911     return;
3912   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3913   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3914   llvm::Value *Args[] = {
3915       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3916       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3917   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3918 }
3919 
3920 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3921                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3922   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3923   if (OMPBuilder) {
3924     OMPBuilder->CreateFlush(CGF.Builder);
3925   } else {
3926     if (!CGF.HaveInsertPoint())
3927       return;
3928     // Build call void __kmpc_flush(ident_t *loc)
3929     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3930                         emitUpdateLocation(CGF, Loc));
3931   }
3932 }
3933 
3934 namespace {
3935 /// Indexes of fields for type kmp_task_t.
3936 enum KmpTaskTFields {
3937   /// List of shared variables.
3938   KmpTaskTShareds,
3939   /// Task routine.
3940   KmpTaskTRoutine,
3941   /// Partition id for the untied tasks.
3942   KmpTaskTPartId,
3943   /// Function with call of destructors for private variables.
3944   Data1,
3945   /// Task priority.
3946   Data2,
3947   /// (Taskloops only) Lower bound.
3948   KmpTaskTLowerBound,
3949   /// (Taskloops only) Upper bound.
3950   KmpTaskTUpperBound,
3951   /// (Taskloops only) Stride.
3952   KmpTaskTStride,
3953   /// (Taskloops only) Is last iteration flag.
3954   KmpTaskTLastIter,
3955   /// (Taskloops only) Reduction data.
3956   KmpTaskTReductions,
3957 };
3958 } // anonymous namespace
3959 
3960 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3961   return OffloadEntriesTargetRegion.empty() &&
3962          OffloadEntriesDeviceGlobalVar.empty();
3963 }
3964 
3965 /// Initialize target region entry.
3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3967     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3968                                     StringRef ParentName, unsigned LineNum,
3969                                     unsigned Order) {
3970   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3971                                              "only required for the device "
3972                                              "code generation.");
3973   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3974       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3975                                    OMPTargetRegionEntryTargetRegion);
3976   ++OffloadingEntriesNum;
3977 }
3978 
3979 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3980     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3981                                   StringRef ParentName, unsigned LineNum,
3982                                   llvm::Constant *Addr, llvm::Constant *ID,
3983                                   OMPTargetRegionEntryKind Flags) {
3984   // If we are emitting code for a target, the entry is already initialized,
3985   // only has to be registered.
3986   if (CGM.getLangOpts().OpenMPIsDevice) {
3987     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3988       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3989           DiagnosticsEngine::Error,
3990           "Unable to find target region on line '%0' in the device code.");
3991       CGM.getDiags().Report(DiagID) << LineNum;
3992       return;
3993     }
3994     auto &Entry =
3995         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3996     assert(Entry.isValid() && "Entry not initialized!");
3997     Entry.setAddress(Addr);
3998     Entry.setID(ID);
3999     Entry.setFlags(Flags);
4000   } else {
4001     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
4002     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
4003     ++OffloadingEntriesNum;
4004   }
4005 }
4006 
4007 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
4008     unsigned DeviceID, unsigned FileID, StringRef ParentName,
4009     unsigned LineNum) const {
4010   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
4011   if (PerDevice == OffloadEntriesTargetRegion.end())
4012     return false;
4013   auto PerFile = PerDevice->second.find(FileID);
4014   if (PerFile == PerDevice->second.end())
4015     return false;
4016   auto PerParentName = PerFile->second.find(ParentName);
4017   if (PerParentName == PerFile->second.end())
4018     return false;
4019   auto PerLine = PerParentName->second.find(LineNum);
4020   if (PerLine == PerParentName->second.end())
4021     return false;
4022   // Fail if this entry is already registered.
4023   if (PerLine->second.getAddress() || PerLine->second.getID())
4024     return false;
4025   return true;
4026 }
4027 
4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4029     const OffloadTargetRegionEntryInfoActTy &Action) {
4030   // Scan all target region entries and perform the provided action.
4031   for (const auto &D : OffloadEntriesTargetRegion)
4032     for (const auto &F : D.second)
4033       for (const auto &P : F.second)
4034         for (const auto &L : P.second)
4035           Action(D.first, F.first, P.first(), L.first, L.second);
4036 }
4037 
4038 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4039     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4040                                        OMPTargetGlobalVarEntryKind Flags,
4041                                        unsigned Order) {
4042   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4043                                              "only required for the device "
4044                                              "code generation.");
4045   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4046   ++OffloadingEntriesNum;
4047 }
4048 
4049 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4050     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4051                                      CharUnits VarSize,
4052                                      OMPTargetGlobalVarEntryKind Flags,
4053                                      llvm::GlobalValue::LinkageTypes Linkage) {
4054   if (CGM.getLangOpts().OpenMPIsDevice) {
4055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4056     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4057            "Entry not initialized!");
4058     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4059            "Resetting with the new address.");
4060     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4061       if (Entry.getVarSize().isZero()) {
4062         Entry.setVarSize(VarSize);
4063         Entry.setLinkage(Linkage);
4064       }
4065       return;
4066     }
4067     Entry.setVarSize(VarSize);
4068     Entry.setLinkage(Linkage);
4069     Entry.setAddress(Addr);
4070   } else {
4071     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4072       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4073       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4074              "Entry not initialized!");
4075       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4076              "Resetting with the new address.");
4077       if (Entry.getVarSize().isZero()) {
4078         Entry.setVarSize(VarSize);
4079         Entry.setLinkage(Linkage);
4080       }
4081       return;
4082     }
4083     OffloadEntriesDeviceGlobalVar.try_emplace(
4084         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4085     ++OffloadingEntriesNum;
4086   }
4087 }
4088 
4089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4090     actOnDeviceGlobalVarEntriesInfo(
4091         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4092   // Scan all target region entries and perform the provided action.
4093   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4094     Action(E.getKey(), E.getValue());
4095 }
4096 
4097 void CGOpenMPRuntime::createOffloadEntry(
4098     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4099     llvm::GlobalValue::LinkageTypes Linkage) {
4100   StringRef Name = Addr->getName();
4101   llvm::Module &M = CGM.getModule();
4102   llvm::LLVMContext &C = M.getContext();
4103 
4104   // Create constant string with the name.
4105   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4106 
4107   std::string StringName = getName({"omp_offloading", "entry_name"});
4108   auto *Str = new llvm::GlobalVariable(
4109       M, StrPtrInit->getType(), /*isConstant=*/true,
4110       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4111   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4112 
4113   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4114                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4115                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4116                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4117                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4118   std::string EntryName = getName({"omp_offloading", "entry", ""});
4119   llvm::GlobalVariable *Entry = createGlobalStruct(
4120       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4121       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4122 
4123   // The entry has to be created in the section the linker expects it to be.
4124   Entry->setSection("omp_offloading_entries");
4125 }
4126 
4127 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4128   // Emit the offloading entries and metadata so that the device codegen side
4129   // can easily figure out what to emit. The produced metadata looks like
4130   // this:
4131   //
4132   // !omp_offload.info = !{!1, ...}
4133   //
4134   // Right now we only generate metadata for function that contain target
4135   // regions.
4136 
4137   // If we are in simd mode or there are no entries, we don't need to do
4138   // anything.
4139   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4140     return;
4141 
4142   llvm::Module &M = CGM.getModule();
4143   llvm::LLVMContext &C = M.getContext();
4144   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4145                          SourceLocation, StringRef>,
4146               16>
4147       OrderedEntries(OffloadEntriesInfoManager.size());
4148   llvm::SmallVector<StringRef, 16> ParentFunctions(
4149       OffloadEntriesInfoManager.size());
4150 
4151   // Auxiliary methods to create metadata values and strings.
4152   auto &&GetMDInt = [this](unsigned V) {
4153     return llvm::ConstantAsMetadata::get(
4154         llvm::ConstantInt::get(CGM.Int32Ty, V));
4155   };
4156 
4157   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4158 
4159   // Create the offloading info metadata node.
4160   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4161 
4162   // Create function that emits metadata for each target region entry;
4163   auto &&TargetRegionMetadataEmitter =
4164       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4165        &GetMDString](
4166           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4167           unsigned Line,
4168           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4169         // Generate metadata for target regions. Each entry of this metadata
4170         // contains:
4171         // - Entry 0 -> Kind of this type of metadata (0).
4172         // - Entry 1 -> Device ID of the file where the entry was identified.
4173         // - Entry 2 -> File ID of the file where the entry was identified.
4174         // - Entry 3 -> Mangled name of the function where the entry was
4175         // identified.
4176         // - Entry 4 -> Line in the file where the entry was identified.
4177         // - Entry 5 -> Order the entry was created.
4178         // The first element of the metadata node is the kind.
4179         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4180                                  GetMDInt(FileID),      GetMDString(ParentName),
4181                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4182 
4183         SourceLocation Loc;
4184         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4185                   E = CGM.getContext().getSourceManager().fileinfo_end();
4186              I != E; ++I) {
4187           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4188               I->getFirst()->getUniqueID().getFile() == FileID) {
4189             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4190                 I->getFirst(), Line, 1);
4191             break;
4192           }
4193         }
4194         // Save this entry in the right position of the ordered entries array.
4195         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4196         ParentFunctions[E.getOrder()] = ParentName;
4197 
4198         // Add metadata to the named metadata node.
4199         MD->addOperand(llvm::MDNode::get(C, Ops));
4200       };
4201 
4202   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4203       TargetRegionMetadataEmitter);
4204 
4205   // Create function that emits metadata for each device global variable entry;
4206   auto &&DeviceGlobalVarMetadataEmitter =
4207       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4208        MD](StringRef MangledName,
4209            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4210                &E) {
4211         // Generate metadata for global variables. Each entry of this metadata
4212         // contains:
4213         // - Entry 0 -> Kind of this type of metadata (1).
4214         // - Entry 1 -> Mangled name of the variable.
4215         // - Entry 2 -> Declare target kind.
4216         // - Entry 3 -> Order the entry was created.
4217         // The first element of the metadata node is the kind.
4218         llvm::Metadata *Ops[] = {
4219             GetMDInt(E.getKind()), GetMDString(MangledName),
4220             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4221 
4222         // Save this entry in the right position of the ordered entries array.
4223         OrderedEntries[E.getOrder()] =
4224             std::make_tuple(&E, SourceLocation(), MangledName);
4225 
4226         // Add metadata to the named metadata node.
4227         MD->addOperand(llvm::MDNode::get(C, Ops));
4228       };
4229 
4230   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4231       DeviceGlobalVarMetadataEmitter);
4232 
4233   for (const auto &E : OrderedEntries) {
4234     assert(std::get<0>(E) && "All ordered entries must exist!");
4235     if (const auto *CE =
4236             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4237                 std::get<0>(E))) {
4238       if (!CE->getID() || !CE->getAddress()) {
4239         // Do not blame the entry if the parent funtion is not emitted.
4240         StringRef FnName = ParentFunctions[CE->getOrder()];
4241         if (!CGM.GetGlobalValue(FnName))
4242           continue;
4243         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4244             DiagnosticsEngine::Error,
4245             "Offloading entry for target region in %0 is incorrect: either the "
4246             "address or the ID is invalid.");
4247         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4248         continue;
4249       }
4250       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4251                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4252     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4253                                              OffloadEntryInfoDeviceGlobalVar>(
4254                    std::get<0>(E))) {
4255       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4256           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4257               CE->getFlags());
4258       switch (Flags) {
4259       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4260         if (CGM.getLangOpts().OpenMPIsDevice &&
4261             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4262           continue;
4263         if (!CE->getAddress()) {
4264           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4265               DiagnosticsEngine::Error, "Offloading entry for declare target "
4266                                         "variable %0 is incorrect: the "
4267                                         "address is invalid.");
4268           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4269           continue;
4270         }
4271         // The vaiable has no definition - no need to add the entry.
4272         if (CE->getVarSize().isZero())
4273           continue;
4274         break;
4275       }
4276       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4277         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4278                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4279                "Declaret target link address is set.");
4280         if (CGM.getLangOpts().OpenMPIsDevice)
4281           continue;
4282         if (!CE->getAddress()) {
4283           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4284               DiagnosticsEngine::Error,
4285               "Offloading entry for declare target variable is incorrect: the "
4286               "address is invalid.");
4287           CGM.getDiags().Report(DiagID);
4288           continue;
4289         }
4290         break;
4291       }
4292       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4293                          CE->getVarSize().getQuantity(), Flags,
4294                          CE->getLinkage());
4295     } else {
4296       llvm_unreachable("Unsupported entry kind.");
4297     }
4298   }
4299 }
4300 
4301 /// Loads all the offload entries information from the host IR
4302 /// metadata.
4303 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4304   // If we are in target mode, load the metadata from the host IR. This code has
4305   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4306 
4307   if (!CGM.getLangOpts().OpenMPIsDevice)
4308     return;
4309 
4310   if (CGM.getLangOpts().OMPHostIRFile.empty())
4311     return;
4312 
4313   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4314   if (auto EC = Buf.getError()) {
4315     CGM.getDiags().Report(diag::err_cannot_open_file)
4316         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4317     return;
4318   }
4319 
4320   llvm::LLVMContext C;
4321   auto ME = expectedToErrorOrAndEmitErrors(
4322       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4323 
4324   if (auto EC = ME.getError()) {
4325     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4326         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4327     CGM.getDiags().Report(DiagID)
4328         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4329     return;
4330   }
4331 
4332   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4333   if (!MD)
4334     return;
4335 
4336   for (llvm::MDNode *MN : MD->operands()) {
4337     auto &&GetMDInt = [MN](unsigned Idx) {
4338       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4339       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4340     };
4341 
4342     auto &&GetMDString = [MN](unsigned Idx) {
4343       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4344       return V->getString();
4345     };
4346 
4347     switch (GetMDInt(0)) {
4348     default:
4349       llvm_unreachable("Unexpected metadata!");
4350       break;
4351     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4352         OffloadingEntryInfoTargetRegion:
4353       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4354           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4355           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4356           /*Order=*/GetMDInt(5));
4357       break;
4358     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4359         OffloadingEntryInfoDeviceGlobalVar:
4360       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4361           /*MangledName=*/GetMDString(1),
4362           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4363               /*Flags=*/GetMDInt(2)),
4364           /*Order=*/GetMDInt(3));
4365       break;
4366     }
4367   }
4368 }
4369 
4370 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4371   if (!KmpRoutineEntryPtrTy) {
4372     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4373     ASTContext &C = CGM.getContext();
4374     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4375     FunctionProtoType::ExtProtoInfo EPI;
4376     KmpRoutineEntryPtrQTy = C.getPointerType(
4377         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4378     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4379   }
4380 }
4381 
4382 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4383   // Make sure the type of the entry is already created. This is the type we
4384   // have to create:
4385   // struct __tgt_offload_entry{
4386   //   void      *addr;       // Pointer to the offload entry info.
4387   //                          // (function or global)
4388   //   char      *name;       // Name of the function or global.
4389   //   size_t     size;       // Size of the entry info (0 if it a function).
4390   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4391   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4392   // };
4393   if (TgtOffloadEntryQTy.isNull()) {
4394     ASTContext &C = CGM.getContext();
4395     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4396     RD->startDefinition();
4397     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4398     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4399     addFieldToRecordDecl(C, RD, C.getSizeType());
4400     addFieldToRecordDecl(
4401         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4402     addFieldToRecordDecl(
4403         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4404     RD->completeDefinition();
4405     RD->addAttr(PackedAttr::CreateImplicit(C));
4406     TgtOffloadEntryQTy = C.getRecordType(RD);
4407   }
4408   return TgtOffloadEntryQTy;
4409 }
4410 
4411 namespace {
4412 struct PrivateHelpersTy {
4413   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4414                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4415       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4416         PrivateElemInit(PrivateElemInit) {}
4417   const Expr *OriginalRef = nullptr;
4418   const VarDecl *Original = nullptr;
4419   const VarDecl *PrivateCopy = nullptr;
4420   const VarDecl *PrivateElemInit = nullptr;
4421 };
4422 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4423 } // anonymous namespace
4424 
4425 static RecordDecl *
4426 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4427   if (!Privates.empty()) {
4428     ASTContext &C = CGM.getContext();
4429     // Build struct .kmp_privates_t. {
4430     //         /*  private vars  */
4431     //       };
4432     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4433     RD->startDefinition();
4434     for (const auto &Pair : Privates) {
4435       const VarDecl *VD = Pair.second.Original;
4436       QualType Type = VD->getType().getNonReferenceType();
4437       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4438       if (VD->hasAttrs()) {
4439         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4440              E(VD->getAttrs().end());
4441              I != E; ++I)
4442           FD->addAttr(*I);
4443       }
4444     }
4445     RD->completeDefinition();
4446     return RD;
4447   }
4448   return nullptr;
4449 }
4450 
4451 static RecordDecl *
4452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4453                          QualType KmpInt32Ty,
4454                          QualType KmpRoutineEntryPointerQTy) {
4455   ASTContext &C = CGM.getContext();
4456   // Build struct kmp_task_t {
4457   //         void *              shareds;
4458   //         kmp_routine_entry_t routine;
4459   //         kmp_int32           part_id;
4460   //         kmp_cmplrdata_t data1;
4461   //         kmp_cmplrdata_t data2;
4462   // For taskloops additional fields:
4463   //         kmp_uint64          lb;
4464   //         kmp_uint64          ub;
4465   //         kmp_int64           st;
4466   //         kmp_int32           liter;
4467   //         void *              reductions;
4468   //       };
4469   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4470   UD->startDefinition();
4471   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4472   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4473   UD->completeDefinition();
4474   QualType KmpCmplrdataTy = C.getRecordType(UD);
4475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4476   RD->startDefinition();
4477   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4478   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4479   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4480   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4481   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4482   if (isOpenMPTaskLoopDirective(Kind)) {
4483     QualType KmpUInt64Ty =
4484         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4485     QualType KmpInt64Ty =
4486         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4487     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4488     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4489     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4490     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4491     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4492   }
4493   RD->completeDefinition();
4494   return RD;
4495 }
4496 
4497 static RecordDecl *
4498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4499                                      ArrayRef<PrivateDataTy> Privates) {
4500   ASTContext &C = CGM.getContext();
4501   // Build struct kmp_task_t_with_privates {
4502   //         kmp_task_t task_data;
4503   //         .kmp_privates_t. privates;
4504   //       };
4505   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4506   RD->startDefinition();
4507   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4508   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4509     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4510   RD->completeDefinition();
4511   return RD;
4512 }
4513 
4514 /// Emit a proxy function which accepts kmp_task_t as the second
4515 /// argument.
4516 /// \code
4517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4518 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4519 ///   For taskloops:
4520 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4521 ///   tt->reductions, tt->shareds);
4522 ///   return 0;
4523 /// }
4524 /// \endcode
4525 static llvm::Function *
4526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4527                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4528                       QualType KmpTaskTWithPrivatesPtrQTy,
4529                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4530                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4531                       llvm::Value *TaskPrivatesMap) {
4532   ASTContext &C = CGM.getContext();
4533   FunctionArgList Args;
4534   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4535                             ImplicitParamDecl::Other);
4536   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4537                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4538                                 ImplicitParamDecl::Other);
4539   Args.push_back(&GtidArg);
4540   Args.push_back(&TaskTypeArg);
4541   const auto &TaskEntryFnInfo =
4542       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4543   llvm::FunctionType *TaskEntryTy =
4544       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4545   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4546   auto *TaskEntry = llvm::Function::Create(
4547       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4548   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4549   TaskEntry->setDoesNotRecurse();
4550   CodeGenFunction CGF(CGM);
4551   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4552                     Loc, Loc);
4553 
4554   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4555   // tt,
4556   // For taskloops:
4557   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4558   // tt->task_data.shareds);
4559   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4560       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4561   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4562       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4563       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4564   const auto *KmpTaskTWithPrivatesQTyRD =
4565       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4566   LValue Base =
4567       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4568   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4569   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4570   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4571   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4572 
4573   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4574   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4575   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4577       CGF.ConvertTypeForMem(SharedsPtrTy));
4578 
4579   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4580   llvm::Value *PrivatesParam;
4581   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4582     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4583     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4585   } else {
4586     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4587   }
4588 
4589   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4590                                TaskPrivatesMap,
4591                                CGF.Builder
4592                                    .CreatePointerBitCastOrAddrSpaceCast(
4593                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4594                                    .getPointer()};
4595   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4596                                           std::end(CommonArgs));
4597   if (isOpenMPTaskLoopDirective(Kind)) {
4598     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4599     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4600     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4601     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4602     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4603     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4604     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4605     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4606     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4607     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4608     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4609     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4610     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4611     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4612     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4613     CallArgs.push_back(LBParam);
4614     CallArgs.push_back(UBParam);
4615     CallArgs.push_back(StParam);
4616     CallArgs.push_back(LIParam);
4617     CallArgs.push_back(RParam);
4618   }
4619   CallArgs.push_back(SharedsParam);
4620 
4621   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4622                                                   CallArgs);
4623   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4624                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4625   CGF.FinishFunction();
4626   return TaskEntry;
4627 }
4628 
4629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4630                                             SourceLocation Loc,
4631                                             QualType KmpInt32Ty,
4632                                             QualType KmpTaskTWithPrivatesPtrQTy,
4633                                             QualType KmpTaskTWithPrivatesQTy) {
4634   ASTContext &C = CGM.getContext();
4635   FunctionArgList Args;
4636   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4637                             ImplicitParamDecl::Other);
4638   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4639                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4640                                 ImplicitParamDecl::Other);
4641   Args.push_back(&GtidArg);
4642   Args.push_back(&TaskTypeArg);
4643   const auto &DestructorFnInfo =
4644       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4645   llvm::FunctionType *DestructorFnTy =
4646       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4647   std::string Name =
4648       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4649   auto *DestructorFn =
4650       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4651                              Name, &CGM.getModule());
4652   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4653                                     DestructorFnInfo);
4654   DestructorFn->setDoesNotRecurse();
4655   CodeGenFunction CGF(CGM);
4656   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4657                     Args, Loc, Loc);
4658 
4659   LValue Base = CGF.EmitLoadOfPointerLValue(
4660       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4661       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4662   const auto *KmpTaskTWithPrivatesQTyRD =
4663       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4664   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4665   Base = CGF.EmitLValueForField(Base, *FI);
4666   for (const auto *Field :
4667        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4668     if (QualType::DestructionKind DtorKind =
4669             Field->getType().isDestructedType()) {
4670       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4671       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4672     }
4673   }
4674   CGF.FinishFunction();
4675   return DestructorFn;
4676 }
4677 
4678 /// Emit a privates mapping function for correct handling of private and
4679 /// firstprivate variables.
4680 /// \code
4681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4682 /// **noalias priv1,...,  <tyn> **noalias privn) {
4683 ///   *priv1 = &.privates.priv1;
4684 ///   ...;
4685 ///   *privn = &.privates.privn;
4686 /// }
4687 /// \endcode
4688 static llvm::Value *
4689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4690                                ArrayRef<const Expr *> PrivateVars,
4691                                ArrayRef<const Expr *> FirstprivateVars,
4692                                ArrayRef<const Expr *> LastprivateVars,
4693                                QualType PrivatesQTy,
4694                                ArrayRef<PrivateDataTy> Privates) {
4695   ASTContext &C = CGM.getContext();
4696   FunctionArgList Args;
4697   ImplicitParamDecl TaskPrivatesArg(
4698       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4699       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4700       ImplicitParamDecl::Other);
4701   Args.push_back(&TaskPrivatesArg);
4702   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4703   unsigned Counter = 1;
4704   for (const Expr *E : PrivateVars) {
4705     Args.push_back(ImplicitParamDecl::Create(
4706         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4707         C.getPointerType(C.getPointerType(E->getType()))
4708             .withConst()
4709             .withRestrict(),
4710         ImplicitParamDecl::Other));
4711     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4712     PrivateVarsPos[VD] = Counter;
4713     ++Counter;
4714   }
4715   for (const Expr *E : FirstprivateVars) {
4716     Args.push_back(ImplicitParamDecl::Create(
4717         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4718         C.getPointerType(C.getPointerType(E->getType()))
4719             .withConst()
4720             .withRestrict(),
4721         ImplicitParamDecl::Other));
4722     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4723     PrivateVarsPos[VD] = Counter;
4724     ++Counter;
4725   }
4726   for (const Expr *E : LastprivateVars) {
4727     Args.push_back(ImplicitParamDecl::Create(
4728         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4729         C.getPointerType(C.getPointerType(E->getType()))
4730             .withConst()
4731             .withRestrict(),
4732         ImplicitParamDecl::Other));
4733     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4734     PrivateVarsPos[VD] = Counter;
4735     ++Counter;
4736   }
4737   const auto &TaskPrivatesMapFnInfo =
4738       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4739   llvm::FunctionType *TaskPrivatesMapTy =
4740       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4741   std::string Name =
4742       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4743   auto *TaskPrivatesMap = llvm::Function::Create(
4744       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4745       &CGM.getModule());
4746   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4747                                     TaskPrivatesMapFnInfo);
4748   if (CGM.getLangOpts().Optimize) {
4749     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4750     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4751     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4752   }
4753   CodeGenFunction CGF(CGM);
4754   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4755                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4756 
4757   // *privi = &.privates.privi;
4758   LValue Base = CGF.EmitLoadOfPointerLValue(
4759       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4760       TaskPrivatesArg.getType()->castAs<PointerType>());
4761   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4762   Counter = 0;
4763   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4764     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4765     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4766     LValue RefLVal =
4767         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4768     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4769         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4770     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4771     ++Counter;
4772   }
4773   CGF.FinishFunction();
4774   return TaskPrivatesMap;
4775 }
4776 
4777 /// Emit initialization for private variables in task-based directives.
4778 static void emitPrivatesInit(CodeGenFunction &CGF,
4779                              const OMPExecutableDirective &D,
4780                              Address KmpTaskSharedsPtr, LValue TDBase,
4781                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4782                              QualType SharedsTy, QualType SharedsPtrTy,
4783                              const OMPTaskDataTy &Data,
4784                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4785   ASTContext &C = CGF.getContext();
4786   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4787   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4788   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4789                                  ? OMPD_taskloop
4790                                  : OMPD_task;
4791   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4792   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4793   LValue SrcBase;
4794   bool IsTargetTask =
4795       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4796       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4797   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4798   // PointersArray and SizesArray. The original variables for these arrays are
4799   // not captured and we get their addresses explicitly.
4800   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4801       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4802     SrcBase = CGF.MakeAddrLValue(
4803         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4805         SharedsTy);
4806   }
4807   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4808   for (const PrivateDataTy &Pair : Privates) {
4809     const VarDecl *VD = Pair.second.PrivateCopy;
4810     const Expr *Init = VD->getAnyInitializer();
4811     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4812                              !CGF.isTrivialInitializer(Init)))) {
4813       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4814       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4815         const VarDecl *OriginalVD = Pair.second.Original;
4816         // Check if the variable is the target-based BasePointersArray,
4817         // PointersArray or SizesArray.
4818         LValue SharedRefLValue;
4819         QualType Type = PrivateLValue.getType();
4820         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4821         if (IsTargetTask && !SharedField) {
4822           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4823                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4824                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4825                          ->getNumParams() == 0 &&
4826                  isa<TranslationUnitDecl>(
4827                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4828                          ->getDeclContext()) &&
4829                  "Expected artificial target data variable.");
4830           SharedRefLValue =
4831               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4832         } else if (ForDup) {
4833           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4834           SharedRefLValue = CGF.MakeAddrLValue(
4835               Address(SharedRefLValue.getPointer(CGF),
4836                       C.getDeclAlign(OriginalVD)),
4837               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4838               SharedRefLValue.getTBAAInfo());
4839         } else {
4840           InlinedOpenMPRegionRAII Region(
4841               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4842               /*HasCancel=*/false);
4843           SharedRefLValue =  CGF.EmitLValue(Pair.second.OriginalRef);
4844         }
4845         if (Type->isArrayType()) {
4846           // Initialize firstprivate array.
4847           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4848             // Perform simple memcpy.
4849             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4850           } else {
4851             // Initialize firstprivate array using element-by-element
4852             // initialization.
4853             CGF.EmitOMPAggregateAssign(
4854                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4855                 Type,
4856                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4857                                                   Address SrcElement) {
4858                   // Clean up any temporaries needed by the initialization.
4859                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4860                   InitScope.addPrivate(
4861                       Elem, [SrcElement]() -> Address { return SrcElement; });
4862                   (void)InitScope.Privatize();
4863                   // Emit initialization for single element.
4864                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4865                       CGF, &CapturesInfo);
4866                   CGF.EmitAnyExprToMem(Init, DestElement,
4867                                        Init->getType().getQualifiers(),
4868                                        /*IsInitializer=*/false);
4869                 });
4870           }
4871         } else {
4872           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4873           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4874             return SharedRefLValue.getAddress(CGF);
4875           });
4876           (void)InitScope.Privatize();
4877           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4878           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4879                              /*capturedByInit=*/false);
4880         }
4881       } else {
4882         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4883       }
4884     }
4885     ++FI;
4886   }
4887 }
4888 
4889 /// Check if duplication function is required for taskloops.
4890 static bool checkInitIsRequired(CodeGenFunction &CGF,
4891                                 ArrayRef<PrivateDataTy> Privates) {
4892   bool InitRequired = false;
4893   for (const PrivateDataTy &Pair : Privates) {
4894     const VarDecl *VD = Pair.second.PrivateCopy;
4895     const Expr *Init = VD->getAnyInitializer();
4896     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4897                                     !CGF.isTrivialInitializer(Init));
4898     if (InitRequired)
4899       break;
4900   }
4901   return InitRequired;
4902 }
4903 
4904 
4905 /// Emit task_dup function (for initialization of
4906 /// private/firstprivate/lastprivate vars and last_iter flag)
4907 /// \code
4908 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4909 /// lastpriv) {
4910 /// // setup lastprivate flag
4911 ///    task_dst->last = lastpriv;
4912 /// // could be constructor calls here...
4913 /// }
4914 /// \endcode
4915 static llvm::Value *
4916 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4917                     const OMPExecutableDirective &D,
4918                     QualType KmpTaskTWithPrivatesPtrQTy,
4919                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4920                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4921                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4922                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4923   ASTContext &C = CGM.getContext();
4924   FunctionArgList Args;
4925   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4926                            KmpTaskTWithPrivatesPtrQTy,
4927                            ImplicitParamDecl::Other);
4928   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4929                            KmpTaskTWithPrivatesPtrQTy,
4930                            ImplicitParamDecl::Other);
4931   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4932                                 ImplicitParamDecl::Other);
4933   Args.push_back(&DstArg);
4934   Args.push_back(&SrcArg);
4935   Args.push_back(&LastprivArg);
4936   const auto &TaskDupFnInfo =
4937       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4938   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4939   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4940   auto *TaskDup = llvm::Function::Create(
4941       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4942   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4943   TaskDup->setDoesNotRecurse();
4944   CodeGenFunction CGF(CGM);
4945   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4946                     Loc);
4947 
4948   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4949       CGF.GetAddrOfLocalVar(&DstArg),
4950       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4951   // task_dst->liter = lastpriv;
4952   if (WithLastIter) {
4953     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4954     LValue Base = CGF.EmitLValueForField(
4955         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4956     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4957     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4958         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4959     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4960   }
4961 
4962   // Emit initial values for private copies (if any).
4963   assert(!Privates.empty());
4964   Address KmpTaskSharedsPtr = Address::invalid();
4965   if (!Data.FirstprivateVars.empty()) {
4966     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4967         CGF.GetAddrOfLocalVar(&SrcArg),
4968         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4969     LValue Base = CGF.EmitLValueForField(
4970         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4971     KmpTaskSharedsPtr = Address(
4972         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4973                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4974                                                   KmpTaskTShareds)),
4975                              Loc),
4976         CGF.getNaturalTypeAlignment(SharedsTy));
4977   }
4978   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4979                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4980   CGF.FinishFunction();
4981   return TaskDup;
4982 }
4983 
4984 /// Checks if destructor function is required to be generated.
4985 /// \return true if cleanups are required, false otherwise.
4986 static bool
4987 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4988   bool NeedsCleanup = false;
4989   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4990   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4991   for (const FieldDecl *FD : PrivateRD->fields()) {
4992     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4993     if (NeedsCleanup)
4994       break;
4995   }
4996   return NeedsCleanup;
4997 }
4998 
4999 CGOpenMPRuntime::TaskResultTy
5000 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5001                               const OMPExecutableDirective &D,
5002                               llvm::Function *TaskFunction, QualType SharedsTy,
5003                               Address Shareds, const OMPTaskDataTy &Data) {
5004   ASTContext &C = CGM.getContext();
5005   llvm::SmallVector<PrivateDataTy, 4> Privates;
5006   // Aggregate privates and sort them by the alignment.
5007   const auto *I = Data.PrivateCopies.begin();
5008   for (const Expr *E : Data.PrivateVars) {
5009     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5010     Privates.emplace_back(
5011         C.getDeclAlign(VD),
5012         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5013                          /*PrivateElemInit=*/nullptr));
5014     ++I;
5015   }
5016   I = Data.FirstprivateCopies.begin();
5017   const auto *IElemInitRef = Data.FirstprivateInits.begin();
5018   for (const Expr *E : Data.FirstprivateVars) {
5019     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5020     Privates.emplace_back(
5021         C.getDeclAlign(VD),
5022         PrivateHelpersTy(
5023             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5024             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5025     ++I;
5026     ++IElemInitRef;
5027   }
5028   I = Data.LastprivateCopies.begin();
5029   for (const Expr *E : Data.LastprivateVars) {
5030     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5031     Privates.emplace_back(
5032         C.getDeclAlign(VD),
5033         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5034                          /*PrivateElemInit=*/nullptr));
5035     ++I;
5036   }
5037   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5038     return L.first > R.first;
5039   });
5040   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5041   // Build type kmp_routine_entry_t (if not built yet).
5042   emitKmpRoutineEntryT(KmpInt32Ty);
5043   // Build type kmp_task_t (if not built yet).
5044   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5045     if (SavedKmpTaskloopTQTy.isNull()) {
5046       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5047           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5048     }
5049     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5050   } else {
5051     assert((D.getDirectiveKind() == OMPD_task ||
5052             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5053             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5054            "Expected taskloop, task or target directive");
5055     if (SavedKmpTaskTQTy.isNull()) {
5056       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5057           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5058     }
5059     KmpTaskTQTy = SavedKmpTaskTQTy;
5060   }
5061   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5062   // Build particular struct kmp_task_t for the given task.
5063   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5064       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5065   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5066   QualType KmpTaskTWithPrivatesPtrQTy =
5067       C.getPointerType(KmpTaskTWithPrivatesQTy);
5068   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5069   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5070       KmpTaskTWithPrivatesTy->getPointerTo();
5071   llvm::Value *KmpTaskTWithPrivatesTySize =
5072       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5073   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5074 
5075   // Emit initial values for private copies (if any).
5076   llvm::Value *TaskPrivatesMap = nullptr;
5077   llvm::Type *TaskPrivatesMapTy =
5078       std::next(TaskFunction->arg_begin(), 3)->getType();
5079   if (!Privates.empty()) {
5080     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5081     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5082         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5083         FI->getType(), Privates);
5084     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5085         TaskPrivatesMap, TaskPrivatesMapTy);
5086   } else {
5087     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5088         cast<llvm::PointerType>(TaskPrivatesMapTy));
5089   }
5090   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5091   // kmp_task_t *tt);
5092   llvm::Function *TaskEntry = emitProxyTaskFunction(
5093       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5094       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5095       TaskPrivatesMap);
5096 
5097   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5098   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5099   // kmp_routine_entry_t *task_entry);
5100   // Task flags. Format is taken from
5101   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5102   // description of kmp_tasking_flags struct.
5103   enum {
5104     TiedFlag = 0x1,
5105     FinalFlag = 0x2,
5106     DestructorsFlag = 0x8,
5107     PriorityFlag = 0x20,
5108     DetachableFlag = 0x40,
5109   };
5110   unsigned Flags = Data.Tied ? TiedFlag : 0;
5111   bool NeedsCleanup = false;
5112   if (!Privates.empty()) {
5113     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5114     if (NeedsCleanup)
5115       Flags = Flags | DestructorsFlag;
5116   }
5117   if (Data.Priority.getInt())
5118     Flags = Flags | PriorityFlag;
5119   if (D.hasClausesOfKind<OMPDetachClause>())
5120     Flags = Flags | DetachableFlag;
5121   llvm::Value *TaskFlags =
5122       Data.Final.getPointer()
5123           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5124                                      CGF.Builder.getInt32(FinalFlag),
5125                                      CGF.Builder.getInt32(/*C=*/0))
5126           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5127   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5128   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5129   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5130       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5131       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5132           TaskEntry, KmpRoutineEntryPtrTy)};
5133   llvm::Value *NewTask;
5134   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5135     // Check if we have any device clause associated with the directive.
5136     const Expr *Device = nullptr;
5137     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5138       Device = C->getDevice();
5139     // Emit device ID if any otherwise use default value.
5140     llvm::Value *DeviceID;
5141     if (Device)
5142       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5143                                            CGF.Int64Ty, /*isSigned=*/true);
5144     else
5145       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5146     AllocArgs.push_back(DeviceID);
5147     NewTask = CGF.EmitRuntimeCall(
5148       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5149   } else {
5150     NewTask = CGF.EmitRuntimeCall(
5151       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5152   }
5153   // Emit detach clause initialization.
5154   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
5155   // task_descriptor);
5156   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
5157     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
5158     LValue EvtLVal = CGF.EmitLValue(Evt);
5159 
5160     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
5161     // int gtid, kmp_task_t *task);
5162     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
5163     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
5164     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
5165     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
5166         createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event),
5167         {Loc, Tid, NewTask});
5168     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
5169                                       Evt->getExprLoc());
5170     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
5171   }
5172   llvm::Value *NewTaskNewTaskTTy =
5173       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5174           NewTask, KmpTaskTWithPrivatesPtrTy);
5175   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5176                                                KmpTaskTWithPrivatesQTy);
5177   LValue TDBase =
5178       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5179   // Fill the data in the resulting kmp_task_t record.
5180   // Copy shareds if there are any.
5181   Address KmpTaskSharedsPtr = Address::invalid();
5182   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5183     KmpTaskSharedsPtr =
5184         Address(CGF.EmitLoadOfScalar(
5185                     CGF.EmitLValueForField(
5186                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5187                                            KmpTaskTShareds)),
5188                     Loc),
5189                 CGF.getNaturalTypeAlignment(SharedsTy));
5190     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5191     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5192     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5193   }
5194   // Emit initial values for private copies (if any).
5195   TaskResultTy Result;
5196   if (!Privates.empty()) {
5197     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5198                      SharedsTy, SharedsPtrTy, Data, Privates,
5199                      /*ForDup=*/false);
5200     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5201         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5202       Result.TaskDupFn = emitTaskDupFunction(
5203           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5204           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5205           /*WithLastIter=*/!Data.LastprivateVars.empty());
5206     }
5207   }
5208   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5209   enum { Priority = 0, Destructors = 1 };
5210   // Provide pointer to function with destructors for privates.
5211   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5212   const RecordDecl *KmpCmplrdataUD =
5213       (*FI)->getType()->getAsUnionType()->getDecl();
5214   if (NeedsCleanup) {
5215     llvm::Value *DestructorFn = emitDestructorsFunction(
5216         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5217         KmpTaskTWithPrivatesQTy);
5218     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5219     LValue DestructorsLV = CGF.EmitLValueForField(
5220         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5221     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5222                               DestructorFn, KmpRoutineEntryPtrTy),
5223                           DestructorsLV);
5224   }
5225   // Set priority.
5226   if (Data.Priority.getInt()) {
5227     LValue Data2LV = CGF.EmitLValueForField(
5228         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5229     LValue PriorityLV = CGF.EmitLValueForField(
5230         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5231     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5232   }
5233   Result.NewTask = NewTask;
5234   Result.TaskEntry = TaskEntry;
5235   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5236   Result.TDBase = TDBase;
5237   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5238   return Result;
5239 }
5240 
5241 namespace {
5242 /// Dependence kind for RTL.
5243 enum RTLDependenceKindTy {
5244   DepIn = 0x01,
5245   DepInOut = 0x3,
5246   DepMutexInOutSet = 0x4
5247 };
5248 /// Fields ids in kmp_depend_info record.
5249 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5250 } // namespace
5251 
5252 /// Translates internal dependency kind into the runtime kind.
5253 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5254   RTLDependenceKindTy DepKind;
5255   switch (K) {
5256   case OMPC_DEPEND_in:
5257     DepKind = DepIn;
5258     break;
5259   // Out and InOut dependencies must use the same code.
5260   case OMPC_DEPEND_out:
5261   case OMPC_DEPEND_inout:
5262     DepKind = DepInOut;
5263     break;
5264   case OMPC_DEPEND_mutexinoutset:
5265     DepKind = DepMutexInOutSet;
5266     break;
5267   case OMPC_DEPEND_source:
5268   case OMPC_DEPEND_sink:
5269   case OMPC_DEPEND_depobj:
5270   case OMPC_DEPEND_unknown:
5271     llvm_unreachable("Unknown task dependence type");
5272   }
5273   return DepKind;
5274 }
5275 
5276 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5277 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5278                            QualType &FlagsTy) {
5279   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5280   if (KmpDependInfoTy.isNull()) {
5281     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5282     KmpDependInfoRD->startDefinition();
5283     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5284     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5285     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5286     KmpDependInfoRD->completeDefinition();
5287     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5288   }
5289 }
5290 
5291 std::pair<llvm::Value *, LValue>
5292 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5293                                    SourceLocation Loc) {
5294   ASTContext &C = CGM.getContext();
5295   QualType FlagsTy;
5296   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5297   RecordDecl *KmpDependInfoRD =
5298       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5299   LValue Base = CGF.EmitLoadOfPointerLValue(
5300       DepobjLVal.getAddress(CGF),
5301       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5302   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5303   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5304           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5305   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5306                             Base.getTBAAInfo());
5307   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5308       Addr.getPointer(),
5309       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5310   LValue NumDepsBase = CGF.MakeAddrLValue(
5311       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5312       Base.getBaseInfo(), Base.getTBAAInfo());
5313   // NumDeps = deps[i].base_addr;
5314   LValue BaseAddrLVal = CGF.EmitLValueForField(
5315       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5316   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5317   return std::make_pair(NumDeps, Base);
5318 }
5319 
5320 namespace {
5321 /// Loop generator for OpenMP iterator expression.
5322 class OMPIteratorGeneratorScope final
5323     : public CodeGenFunction::OMPPrivateScope {
5324   CodeGenFunction &CGF;
5325   const OMPIteratorExpr *E = nullptr;
5326   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
5327   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
5328   OMPIteratorGeneratorScope() = delete;
5329   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
5330 
5331 public:
5332   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
5333       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
5334     if (!E)
5335       return;
5336     SmallVector<llvm::Value *, 4> Uppers;
5337     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5338       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
5339       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
5340       addPrivate(VD, [&CGF, VD]() {
5341         return CGF.CreateMemTemp(VD->getType(), VD->getName());
5342       });
5343       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5344       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
5345         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
5346                                  "counter.addr");
5347       });
5348     }
5349     Privatize();
5350 
5351     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
5352       const OMPIteratorHelperData &HelperData = E->getHelper(I);
5353       LValue CLVal =
5354           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
5355                              HelperData.CounterVD->getType());
5356       // Counter = 0;
5357       CGF.EmitStoreOfScalar(
5358           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
5359           CLVal);
5360       CodeGenFunction::JumpDest &ContDest =
5361           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
5362       CodeGenFunction::JumpDest &ExitDest =
5363           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
5364       // N = <number-of_iterations>;
5365       llvm::Value *N = Uppers[I];
5366       // cont:
5367       // if (Counter < N) goto body; else goto exit;
5368       CGF.EmitBlock(ContDest.getBlock());
5369       auto *CVal =
5370           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
5371       llvm::Value *Cmp =
5372           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
5373               ? CGF.Builder.CreateICmpSLT(CVal, N)
5374               : CGF.Builder.CreateICmpULT(CVal, N);
5375       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
5376       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
5377       // body:
5378       CGF.EmitBlock(BodyBB);
5379       // Iteri = Begini + Counter * Stepi;
5380       CGF.EmitIgnoredExpr(HelperData.Update);
5381     }
5382   }
5383   ~OMPIteratorGeneratorScope() {
5384     if (!E)
5385       return;
5386     for (unsigned I = E->numOfIterators(); I > 0; --I) {
5387       // Counter = Counter + 1;
5388       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
5389       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
5390       // goto cont;
5391       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
5392       // exit:
5393       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
5394     }
5395   }
5396 };
5397 } // namespace
5398 
5399 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5400                            llvm::PointerUnion<unsigned *, LValue *> Pos,
5401                            const OMPTaskDataTy::DependData &Data,
5402                            Address DependenciesArray) {
5403   CodeGenModule &CGM = CGF.CGM;
5404   ASTContext &C = CGM.getContext();
5405   QualType FlagsTy;
5406   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5407   RecordDecl *KmpDependInfoRD =
5408       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5409   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5410 
5411   OMPIteratorGeneratorScope IteratorScope(
5412       CGF, cast_or_null<OMPIteratorExpr>(
5413                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5414                                  : nullptr));
5415   for (const Expr *E : Data.DepExprs) {
5416     const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
5417     llvm::Value *Addr;
5418     if (OASE) {
5419       const Expr *Base = OASE->getBase();
5420       Addr = CGF.EmitScalarExpr(Base);
5421     } else {
5422       Addr = CGF.EmitLValue(E).getPointer(CGF);
5423     }
5424     llvm::Value *Size;
5425     QualType Ty = E->getType();
5426     if (OASE) {
5427       Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
5428       for (const Expr *SE : OASE->getDimensions()) {
5429         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
5430         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
5431                                       CGF.getContext().getSizeType(),
5432                                       SE->getExprLoc());
5433         Size = CGF.Builder.CreateNUWMul(Size, Sz);
5434       }
5435     } else if (const auto *ASE =
5436                    dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5437       LValue UpAddrLVal =
5438           CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5439       llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5440           UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5441       llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy);
5442       llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5443       Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5444     } else {
5445       Size = CGF.getTypeSize(Ty);
5446     }
5447     LValue Base;
5448     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5449       Base = CGF.MakeAddrLValue(
5450           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
5451     } else {
5452       LValue &PosLVal = *Pos.get<LValue *>();
5453       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5454       Base = CGF.MakeAddrLValue(
5455           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
5456                   DependenciesArray.getAlignment()),
5457           KmpDependInfoTy);
5458     }
5459     // deps[i].base_addr = &<Dependencies[i].second>;
5460     LValue BaseAddrLVal = CGF.EmitLValueForField(
5461         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5462     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
5463                           BaseAddrLVal);
5464     // deps[i].len = sizeof(<Dependencies[i].second>);
5465     LValue LenLVal = CGF.EmitLValueForField(
5466         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5467     CGF.EmitStoreOfScalar(Size, LenLVal);
5468     // deps[i].flags = <Dependencies[i].first>;
5469     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
5470     LValue FlagsLVal = CGF.EmitLValueForField(
5471         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5472     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5473                           FlagsLVal);
5474     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
5475       ++(*P);
5476     } else {
5477       LValue &PosLVal = *Pos.get<LValue *>();
5478       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5479       Idx = CGF.Builder.CreateNUWAdd(Idx,
5480                                      llvm::ConstantInt::get(Idx->getType(), 1));
5481       CGF.EmitStoreOfScalar(Idx, PosLVal);
5482     }
5483   }
5484 }
5485 
5486 static SmallVector<llvm::Value *, 4>
5487 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5488                         const OMPTaskDataTy::DependData &Data) {
5489   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5490          "Expected depobj dependecy kind.");
5491   SmallVector<llvm::Value *, 4> Sizes;
5492   SmallVector<LValue, 4> SizeLVals;
5493   ASTContext &C = CGF.getContext();
5494   QualType FlagsTy;
5495   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5496   RecordDecl *KmpDependInfoRD =
5497       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5498   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5499   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5500   {
5501     OMPIteratorGeneratorScope IteratorScope(
5502         CGF, cast_or_null<OMPIteratorExpr>(
5503                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5504                                    : nullptr));
5505     for (const Expr *E : Data.DepExprs) {
5506       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5507       LValue Base = CGF.EmitLoadOfPointerLValue(
5508           DepobjLVal.getAddress(CGF),
5509           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5510       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5511           Base.getAddress(CGF), KmpDependInfoPtrT);
5512       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5513                                 Base.getTBAAInfo());
5514       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5515           Addr.getPointer(),
5516           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5517       LValue NumDepsBase = CGF.MakeAddrLValue(
5518           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5519           Base.getBaseInfo(), Base.getTBAAInfo());
5520       // NumDeps = deps[i].base_addr;
5521       LValue BaseAddrLVal = CGF.EmitLValueForField(
5522           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5523       llvm::Value *NumDeps =
5524           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5525       LValue NumLVal = CGF.MakeAddrLValue(
5526           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
5527           C.getUIntPtrType());
5528       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
5529                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
5530       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
5531       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
5532       CGF.EmitStoreOfScalar(Add, NumLVal);
5533       SizeLVals.push_back(NumLVal);
5534     }
5535   }
5536   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
5537     llvm::Value *Size =
5538         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
5539     Sizes.push_back(Size);
5540   }
5541   return Sizes;
5542 }
5543 
5544 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
5545                                LValue PosLVal,
5546                                const OMPTaskDataTy::DependData &Data,
5547                                Address DependenciesArray) {
5548   assert(Data.DepKind == OMPC_DEPEND_depobj &&
5549          "Expected depobj dependecy kind.");
5550   ASTContext &C = CGF.getContext();
5551   QualType FlagsTy;
5552   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5553   RecordDecl *KmpDependInfoRD =
5554       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5555   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5556   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
5557   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5558   {
5559     OMPIteratorGeneratorScope IteratorScope(
5560         CGF, cast_or_null<OMPIteratorExpr>(
5561                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
5562                                    : nullptr));
5563     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
5564       const Expr *E = Data.DepExprs[I];
5565       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
5566       LValue Base = CGF.EmitLoadOfPointerLValue(
5567           DepobjLVal.getAddress(CGF),
5568           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5569       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5570           Base.getAddress(CGF), KmpDependInfoPtrT);
5571       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5572                                 Base.getTBAAInfo());
5573 
5574       // Get number of elements in a single depobj.
5575       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5576           Addr.getPointer(),
5577           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5578       LValue NumDepsBase = CGF.MakeAddrLValue(
5579           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5580           Base.getBaseInfo(), Base.getTBAAInfo());
5581       // NumDeps = deps[i].base_addr;
5582       LValue BaseAddrLVal = CGF.EmitLValueForField(
5583           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5584       llvm::Value *NumDeps =
5585           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
5586 
5587       // memcopy dependency data.
5588       llvm::Value *Size = CGF.Builder.CreateNUWMul(
5589           ElSize,
5590           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
5591       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
5592       Address DepAddr =
5593           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
5594                   DependenciesArray.getAlignment());
5595       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
5596 
5597       // Increase pos.
5598       // pos += size;
5599       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
5600       CGF.EmitStoreOfScalar(Add, PosLVal);
5601     }
5602   }
5603 }
5604 
5605 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5606     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
5607     SourceLocation Loc) {
5608   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
5609         return D.DepExprs.empty();
5610       }))
5611     return std::make_pair(nullptr, Address::invalid());
5612   // Process list of dependencies.
5613   ASTContext &C = CGM.getContext();
5614   Address DependenciesArray = Address::invalid();
5615   llvm::Value *NumOfElements = nullptr;
5616   unsigned NumDependencies = std::accumulate(
5617       Dependencies.begin(), Dependencies.end(), 0,
5618       [](unsigned V, const OMPTaskDataTy::DependData &D) {
5619         return D.DepKind == OMPC_DEPEND_depobj
5620                    ? V
5621                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
5622       });
5623   QualType FlagsTy;
5624   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5625   bool HasDepobjDeps = false;
5626   bool HasRegularWithIterators = false;
5627   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5628   llvm::Value *NumOfRegularWithIterators =
5629       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
5630   // Calculate number of depobj dependecies and regular deps with the iterators.
5631   for (const OMPTaskDataTy::DependData &D : Dependencies) {
5632     if (D.DepKind == OMPC_DEPEND_depobj) {
5633       SmallVector<llvm::Value *, 4> Sizes =
5634           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
5635       for (llvm::Value *Size : Sizes) {
5636         NumOfDepobjElements =
5637             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
5638       }
5639       HasDepobjDeps = true;
5640       continue;
5641     }
5642     // Include number of iterations, if any.
5643     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
5644       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5645         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5646         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
5647         NumOfRegularWithIterators =
5648             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
5649       }
5650       HasRegularWithIterators = true;
5651       continue;
5652     }
5653   }
5654 
5655   QualType KmpDependInfoArrayTy;
5656   if (HasDepobjDeps || HasRegularWithIterators) {
5657     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
5658                                            /*isSigned=*/false);
5659     if (HasDepobjDeps) {
5660       NumOfElements =
5661           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
5662     }
5663     if (HasRegularWithIterators) {
5664       NumOfElements =
5665           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
5666     }
5667     OpaqueValueExpr OVE(Loc,
5668                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5669                         VK_RValue);
5670     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5671                                                   RValue::get(NumOfElements));
5672     KmpDependInfoArrayTy =
5673         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5674                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5675     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5676     // Properly emit variable-sized array.
5677     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5678                                          ImplicitParamDecl::Other);
5679     CGF.EmitVarDecl(*PD);
5680     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5681     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5682                                               /*isSigned=*/false);
5683   } else {
5684     KmpDependInfoArrayTy = C.getConstantArrayType(
5685         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
5686         ArrayType::Normal, /*IndexTypeQuals=*/0);
5687     DependenciesArray =
5688         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5689     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
5690     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5691                                            /*isSigned=*/false);
5692   }
5693   unsigned Pos = 0;
5694   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5695     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5696         Dependencies[I].IteratorExpr)
5697       continue;
5698     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
5699                    DependenciesArray);
5700   }
5701   // Copy regular dependecies with iterators.
5702   LValue PosLVal = CGF.MakeAddrLValue(
5703       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
5704   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
5705   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5706     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
5707         !Dependencies[I].IteratorExpr)
5708       continue;
5709     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
5710                    DependenciesArray);
5711   }
5712   // Copy final depobj arrays without iterators.
5713   if (HasDepobjDeps) {
5714     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
5715       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
5716         continue;
5717       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
5718                          DependenciesArray);
5719     }
5720   }
5721   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5722       DependenciesArray, CGF.VoidPtrTy);
5723   return std::make_pair(NumOfElements, DependenciesArray);
5724 }
5725 
5726 Address CGOpenMPRuntime::emitDepobjDependClause(
5727     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
5728     SourceLocation Loc) {
5729   if (Dependencies.DepExprs.empty())
5730     return Address::invalid();
5731   // Process list of dependencies.
5732   ASTContext &C = CGM.getContext();
5733   Address DependenciesArray = Address::invalid();
5734   unsigned NumDependencies = Dependencies.DepExprs.size();
5735   QualType FlagsTy;
5736   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5737   RecordDecl *KmpDependInfoRD =
5738       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5739 
5740   llvm::Value *Size;
5741   // Define type kmp_depend_info[<Dependencies.size()>];
5742   // For depobj reserve one extra element to store the number of elements.
5743   // It is required to handle depobj(x) update(in) construct.
5744   // kmp_depend_info[<Dependencies.size()>] deps;
5745   llvm::Value *NumDepsVal;
5746   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5747   if (const auto *IE =
5748           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5749     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5750     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5751       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5752       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5753       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5754     }
5755     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5756                                     NumDepsVal);
5757     CharUnits SizeInBytes =
5758         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5759     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5760     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5761     NumDepsVal =
5762         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5763   } else {
5764     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5765         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5766         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5767     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5768     Size = CGM.getSize(Sz.alignTo(Align));
5769     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5770   }
5771   // Need to allocate on the dynamic memory.
5772   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5773   // Use default allocator.
5774   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5775   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5776 
5777   llvm::Value *Addr = CGF.EmitRuntimeCall(
5778       createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5779   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5780       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5781   DependenciesArray = Address(Addr, Align);
5782   // Write number of elements in the first element of array for depobj.
5783   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5784   // deps[i].base_addr = NumDependencies;
5785   LValue BaseAddrLVal = CGF.EmitLValueForField(
5786       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5787   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5788   llvm::PointerUnion<unsigned *, LValue *> Pos;
5789   unsigned Idx = 1;
5790   LValue PosLVal;
5791   if (Dependencies.IteratorExpr) {
5792     PosLVal = CGF.MakeAddrLValue(
5793         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5794         C.getSizeType());
5795     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5796                           /*IsInit=*/true);
5797     Pos = &PosLVal;
5798   } else {
5799     Pos = &Idx;
5800   }
5801   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5802   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5803       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5804   return DependenciesArray;
5805 }
5806 
5807 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5808                                         SourceLocation Loc) {
5809   ASTContext &C = CGM.getContext();
5810   QualType FlagsTy;
5811   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5812   LValue Base = CGF.EmitLoadOfPointerLValue(
5813       DepobjLVal.getAddress(CGF),
5814       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5815   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5816   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5817       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5818   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5819       Addr.getPointer(),
5820       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5821   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5822                                                                CGF.VoidPtrTy);
5823   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5824   // Use default allocator.
5825   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5826   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5827 
5828   // _kmpc_free(gtid, addr, nullptr);
5829   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5830 }
5831 
5832 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5833                                        OpenMPDependClauseKind NewDepKind,
5834                                        SourceLocation Loc) {
5835   ASTContext &C = CGM.getContext();
5836   QualType FlagsTy;
5837   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5838   RecordDecl *KmpDependInfoRD =
5839       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5840   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5841   llvm::Value *NumDeps;
5842   LValue Base;
5843   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5844 
5845   Address Begin = Base.getAddress(CGF);
5846   // Cast from pointer to array type to pointer to single element.
5847   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5848   // The basic structure here is a while-do loop.
5849   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5850   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5851   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5852   CGF.EmitBlock(BodyBB);
5853   llvm::PHINode *ElementPHI =
5854       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5855   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5856   Begin = Address(ElementPHI, Begin.getAlignment());
5857   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5858                             Base.getTBAAInfo());
5859   // deps[i].flags = NewDepKind;
5860   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5861   LValue FlagsLVal = CGF.EmitLValueForField(
5862       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5863   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5864                         FlagsLVal);
5865 
5866   // Shift the address forward by one element.
5867   Address ElementNext =
5868       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5869   ElementPHI->addIncoming(ElementNext.getPointer(),
5870                           CGF.Builder.GetInsertBlock());
5871   llvm::Value *IsEmpty =
5872       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5873   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5874   // Done.
5875   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5876 }
5877 
5878 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5879                                    const OMPExecutableDirective &D,
5880                                    llvm::Function *TaskFunction,
5881                                    QualType SharedsTy, Address Shareds,
5882                                    const Expr *IfCond,
5883                                    const OMPTaskDataTy &Data) {
5884   if (!CGF.HaveInsertPoint())
5885     return;
5886 
5887   TaskResultTy Result =
5888       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5889   llvm::Value *NewTask = Result.NewTask;
5890   llvm::Function *TaskEntry = Result.TaskEntry;
5891   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5892   LValue TDBase = Result.TDBase;
5893   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5894   // Process list of dependences.
5895   Address DependenciesArray = Address::invalid();
5896   llvm::Value *NumOfElements;
5897   std::tie(NumOfElements, DependenciesArray) =
5898       emitDependClause(CGF, Data.Dependences, Loc);
5899 
5900   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5901   // libcall.
5902   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5903   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5904   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5905   // list is not empty
5906   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5907   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5908   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5909   llvm::Value *DepTaskArgs[7];
5910   if (!Data.Dependences.empty()) {
5911     DepTaskArgs[0] = UpLoc;
5912     DepTaskArgs[1] = ThreadID;
5913     DepTaskArgs[2] = NewTask;
5914     DepTaskArgs[3] = NumOfElements;
5915     DepTaskArgs[4] = DependenciesArray.getPointer();
5916     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5917     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5918   }
5919   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5920                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5921     if (!Data.Tied) {
5922       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5923       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5924       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5925     }
5926     if (!Data.Dependences.empty()) {
5927       CGF.EmitRuntimeCall(
5928           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5929     } else {
5930       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5931                           TaskArgs);
5932     }
5933     // Check if parent region is untied and build return for untied task;
5934     if (auto *Region =
5935             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5936       Region->emitUntiedSwitch(CGF);
5937   };
5938 
5939   llvm::Value *DepWaitTaskArgs[6];
5940   if (!Data.Dependences.empty()) {
5941     DepWaitTaskArgs[0] = UpLoc;
5942     DepWaitTaskArgs[1] = ThreadID;
5943     DepWaitTaskArgs[2] = NumOfElements;
5944     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5945     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5946     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5947   }
5948   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5949                         &Data, &DepWaitTaskArgs,
5950                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5951     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5952     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5953     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5954     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5955     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5956     // is specified.
5957     if (!Data.Dependences.empty())
5958       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5959                           DepWaitTaskArgs);
5960     // Call proxy_task_entry(gtid, new_task);
5961     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5962                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5963       Action.Enter(CGF);
5964       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5965       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5966                                                           OutlinedFnArgs);
5967     };
5968 
5969     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5970     // kmp_task_t *new_task);
5971     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5972     // kmp_task_t *new_task);
5973     RegionCodeGenTy RCG(CodeGen);
5974     CommonActionTy Action(
5975         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5976         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5977     RCG.setAction(Action);
5978     RCG(CGF);
5979   };
5980 
5981   if (IfCond) {
5982     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5983   } else {
5984     RegionCodeGenTy ThenRCG(ThenCodeGen);
5985     ThenRCG(CGF);
5986   }
5987 }
5988 
5989 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5990                                        const OMPLoopDirective &D,
5991                                        llvm::Function *TaskFunction,
5992                                        QualType SharedsTy, Address Shareds,
5993                                        const Expr *IfCond,
5994                                        const OMPTaskDataTy &Data) {
5995   if (!CGF.HaveInsertPoint())
5996     return;
5997   TaskResultTy Result =
5998       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5999   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
6000   // libcall.
6001   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
6002   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
6003   // sched, kmp_uint64 grainsize, void *task_dup);
6004   llvm::Value *ThreadID = getThreadID(CGF, Loc);
6005   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6006   llvm::Value *IfVal;
6007   if (IfCond) {
6008     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
6009                                       /*isSigned=*/true);
6010   } else {
6011     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
6012   }
6013 
6014   LValue LBLVal = CGF.EmitLValueForField(
6015       Result.TDBase,
6016       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
6017   const auto *LBVar =
6018       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
6019   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
6020                        LBLVal.getQuals(),
6021                        /*IsInitializer=*/true);
6022   LValue UBLVal = CGF.EmitLValueForField(
6023       Result.TDBase,
6024       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
6025   const auto *UBVar =
6026       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
6027   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
6028                        UBLVal.getQuals(),
6029                        /*IsInitializer=*/true);
6030   LValue StLVal = CGF.EmitLValueForField(
6031       Result.TDBase,
6032       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
6033   const auto *StVar =
6034       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
6035   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
6036                        StLVal.getQuals(),
6037                        /*IsInitializer=*/true);
6038   // Store reductions address.
6039   LValue RedLVal = CGF.EmitLValueForField(
6040       Result.TDBase,
6041       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
6042   if (Data.Reductions) {
6043     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
6044   } else {
6045     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
6046                                CGF.getContext().VoidPtrTy);
6047   }
6048   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
6049   llvm::Value *TaskArgs[] = {
6050       UpLoc,
6051       ThreadID,
6052       Result.NewTask,
6053       IfVal,
6054       LBLVal.getPointer(CGF),
6055       UBLVal.getPointer(CGF),
6056       CGF.EmitLoadOfScalar(StLVal, Loc),
6057       llvm::ConstantInt::getSigned(
6058           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
6059       llvm::ConstantInt::getSigned(
6060           CGF.IntTy, Data.Schedule.getPointer()
6061                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
6062                          : NoSchedule),
6063       Data.Schedule.getPointer()
6064           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
6065                                       /*isSigned=*/false)
6066           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
6067       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6068                              Result.TaskDupFn, CGF.VoidPtrTy)
6069                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
6070   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
6071 }
6072 
6073 /// Emit reduction operation for each element of array (required for
6074 /// array sections) LHS op = RHS.
6075 /// \param Type Type of array.
6076 /// \param LHSVar Variable on the left side of the reduction operation
6077 /// (references element of array in original variable).
6078 /// \param RHSVar Variable on the right side of the reduction operation
6079 /// (references element of array in original variable).
6080 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
6081 /// RHSVar.
6082 static void EmitOMPAggregateReduction(
6083     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
6084     const VarDecl *RHSVar,
6085     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
6086                                   const Expr *, const Expr *)> &RedOpGen,
6087     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
6088     const Expr *UpExpr = nullptr) {
6089   // Perform element-by-element initialization.
6090   QualType ElementTy;
6091   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
6092   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
6093 
6094   // Drill down to the base element type on both arrays.
6095   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
6096   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
6097 
6098   llvm::Value *RHSBegin = RHSAddr.getPointer();
6099   llvm::Value *LHSBegin = LHSAddr.getPointer();
6100   // Cast from pointer to array type to pointer to single element.
6101   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
6102   // The basic structure here is a while-do loop.
6103   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
6104   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
6105   llvm::Value *IsEmpty =
6106       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
6107   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
6108 
6109   // Enter the loop body, making that address the current address.
6110   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
6111   CGF.EmitBlock(BodyBB);
6112 
6113   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
6114 
6115   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
6116       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
6117   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
6118   Address RHSElementCurrent =
6119       Address(RHSElementPHI,
6120               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6121 
6122   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
6123       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
6124   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
6125   Address LHSElementCurrent =
6126       Address(LHSElementPHI,
6127               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
6128 
6129   // Emit copy.
6130   CodeGenFunction::OMPPrivateScope Scope(CGF);
6131   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
6132   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
6133   Scope.Privatize();
6134   RedOpGen(CGF, XExpr, EExpr, UpExpr);
6135   Scope.ForceCleanup();
6136 
6137   // Shift the address forward by one element.
6138   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
6139       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
6140   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
6141       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
6142   // Check whether we've reached the end.
6143   llvm::Value *Done =
6144       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
6145   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
6146   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
6147   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
6148 
6149   // Done.
6150   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
6151 }
6152 
6153 /// Emit reduction combiner. If the combiner is a simple expression emit it as
6154 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
6155 /// UDR combiner function.
6156 static void emitReductionCombiner(CodeGenFunction &CGF,
6157                                   const Expr *ReductionOp) {
6158   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
6159     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
6160       if (const auto *DRE =
6161               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
6162         if (const auto *DRD =
6163                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
6164           std::pair<llvm::Function *, llvm::Function *> Reduction =
6165               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
6166           RValue Func = RValue::get(Reduction.first);
6167           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
6168           CGF.EmitIgnoredExpr(ReductionOp);
6169           return;
6170         }
6171   CGF.EmitIgnoredExpr(ReductionOp);
6172 }
6173 
6174 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
6175     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
6176     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
6177     ArrayRef<const Expr *> ReductionOps) {
6178   ASTContext &C = CGM.getContext();
6179 
6180   // void reduction_func(void *LHSArg, void *RHSArg);
6181   FunctionArgList Args;
6182   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6183                            ImplicitParamDecl::Other);
6184   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6185                            ImplicitParamDecl::Other);
6186   Args.push_back(&LHSArg);
6187   Args.push_back(&RHSArg);
6188   const auto &CGFI =
6189       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6190   std::string Name = getName({"omp", "reduction", "reduction_func"});
6191   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
6192                                     llvm::GlobalValue::InternalLinkage, Name,
6193                                     &CGM.getModule());
6194   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
6195   Fn->setDoesNotRecurse();
6196   CodeGenFunction CGF(CGM);
6197   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
6198 
6199   // Dst = (void*[n])(LHSArg);
6200   // Src = (void*[n])(RHSArg);
6201   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6202       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
6203       ArgsType), CGF.getPointerAlign());
6204   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6205       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
6206       ArgsType), CGF.getPointerAlign());
6207 
6208   //  ...
6209   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
6210   //  ...
6211   CodeGenFunction::OMPPrivateScope Scope(CGF);
6212   auto IPriv = Privates.begin();
6213   unsigned Idx = 0;
6214   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
6215     const auto *RHSVar =
6216         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
6217     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
6218       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
6219     });
6220     const auto *LHSVar =
6221         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
6222     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
6223       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
6224     });
6225     QualType PrivTy = (*IPriv)->getType();
6226     if (PrivTy->isVariablyModifiedType()) {
6227       // Get array size and emit VLA type.
6228       ++Idx;
6229       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
6230       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
6231       const VariableArrayType *VLA =
6232           CGF.getContext().getAsVariableArrayType(PrivTy);
6233       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
6234       CodeGenFunction::OpaqueValueMapping OpaqueMap(
6235           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
6236       CGF.EmitVariablyModifiedType(PrivTy);
6237     }
6238   }
6239   Scope.Privatize();
6240   IPriv = Privates.begin();
6241   auto ILHS = LHSExprs.begin();
6242   auto IRHS = RHSExprs.begin();
6243   for (const Expr *E : ReductionOps) {
6244     if ((*IPriv)->getType()->isArrayType()) {
6245       // Emit reduction for array section.
6246       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6247       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6248       EmitOMPAggregateReduction(
6249           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6250           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6251             emitReductionCombiner(CGF, E);
6252           });
6253     } else {
6254       // Emit reduction for array subscript or single variable.
6255       emitReductionCombiner(CGF, E);
6256     }
6257     ++IPriv;
6258     ++ILHS;
6259     ++IRHS;
6260   }
6261   Scope.ForceCleanup();
6262   CGF.FinishFunction();
6263   return Fn;
6264 }
6265 
6266 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
6267                                                   const Expr *ReductionOp,
6268                                                   const Expr *PrivateRef,
6269                                                   const DeclRefExpr *LHS,
6270                                                   const DeclRefExpr *RHS) {
6271   if (PrivateRef->getType()->isArrayType()) {
6272     // Emit reduction for array section.
6273     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
6274     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
6275     EmitOMPAggregateReduction(
6276         CGF, PrivateRef->getType(), LHSVar, RHSVar,
6277         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
6278           emitReductionCombiner(CGF, ReductionOp);
6279         });
6280   } else {
6281     // Emit reduction for array subscript or single variable.
6282     emitReductionCombiner(CGF, ReductionOp);
6283   }
6284 }
6285 
6286 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
6287                                     ArrayRef<const Expr *> Privates,
6288                                     ArrayRef<const Expr *> LHSExprs,
6289                                     ArrayRef<const Expr *> RHSExprs,
6290                                     ArrayRef<const Expr *> ReductionOps,
6291                                     ReductionOptionsTy Options) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294 
6295   bool WithNowait = Options.WithNowait;
6296   bool SimpleReduction = Options.SimpleReduction;
6297 
6298   // Next code should be emitted for reduction:
6299   //
6300   // static kmp_critical_name lock = { 0 };
6301   //
6302   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
6303   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
6304   //  ...
6305   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
6306   //  *(Type<n>-1*)rhs[<n>-1]);
6307   // }
6308   //
6309   // ...
6310   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
6311   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6312   // RedList, reduce_func, &<lock>)) {
6313   // case 1:
6314   //  ...
6315   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6316   //  ...
6317   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6318   // break;
6319   // case 2:
6320   //  ...
6321   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6322   //  ...
6323   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
6324   // break;
6325   // default:;
6326   // }
6327   //
6328   // if SimpleReduction is true, only the next code is generated:
6329   //  ...
6330   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6331   //  ...
6332 
6333   ASTContext &C = CGM.getContext();
6334 
6335   if (SimpleReduction) {
6336     CodeGenFunction::RunCleanupsScope Scope(CGF);
6337     auto IPriv = Privates.begin();
6338     auto ILHS = LHSExprs.begin();
6339     auto IRHS = RHSExprs.begin();
6340     for (const Expr *E : ReductionOps) {
6341       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6342                                   cast<DeclRefExpr>(*IRHS));
6343       ++IPriv;
6344       ++ILHS;
6345       ++IRHS;
6346     }
6347     return;
6348   }
6349 
6350   // 1. Build a list of reduction variables.
6351   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
6352   auto Size = RHSExprs.size();
6353   for (const Expr *E : Privates) {
6354     if (E->getType()->isVariablyModifiedType())
6355       // Reserve place for array size.
6356       ++Size;
6357   }
6358   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6359   QualType ReductionArrayTy =
6360       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6361                              /*IndexTypeQuals=*/0);
6362   Address ReductionList =
6363       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6364   auto IPriv = Privates.begin();
6365   unsigned Idx = 0;
6366   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6367     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6368     CGF.Builder.CreateStore(
6369         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6370             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6371         Elem);
6372     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6373       // Store array size.
6374       ++Idx;
6375       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6376       llvm::Value *Size = CGF.Builder.CreateIntCast(
6377           CGF.getVLASize(
6378                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6379               .NumElts,
6380           CGF.SizeTy, /*isSigned=*/false);
6381       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6382                               Elem);
6383     }
6384   }
6385 
6386   // 2. Emit reduce_func().
6387   llvm::Function *ReductionFn = emitReductionFunction(
6388       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6389       LHSExprs, RHSExprs, ReductionOps);
6390 
6391   // 3. Create static kmp_critical_name lock = { 0 };
6392   std::string Name = getName({"reduction"});
6393   llvm::Value *Lock = getCriticalRegionLock(Name);
6394 
6395   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6396   // RedList, reduce_func, &<lock>);
6397   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6398   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6399   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6400   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6401       ReductionList.getPointer(), CGF.VoidPtrTy);
6402   llvm::Value *Args[] = {
6403       IdentTLoc,                             // ident_t *<loc>
6404       ThreadId,                              // i32 <gtid>
6405       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6406       ReductionArrayTySize,                  // size_type sizeof(RedList)
6407       RL,                                    // void *RedList
6408       ReductionFn, // void (*) (void *, void *) <reduce_func>
6409       Lock         // kmp_critical_name *&<lock>
6410   };
6411   llvm::Value *Res = CGF.EmitRuntimeCall(
6412       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6413                                        : OMPRTL__kmpc_reduce),
6414       Args);
6415 
6416   // 5. Build switch(res)
6417   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6418   llvm::SwitchInst *SwInst =
6419       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6420 
6421   // 6. Build case 1:
6422   //  ...
6423   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6424   //  ...
6425   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6426   // break;
6427   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6428   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6429   CGF.EmitBlock(Case1BB);
6430 
6431   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6432   llvm::Value *EndArgs[] = {
6433       IdentTLoc, // ident_t *<loc>
6434       ThreadId,  // i32 <gtid>
6435       Lock       // kmp_critical_name *&<lock>
6436   };
6437   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6438                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6439     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6440     auto IPriv = Privates.begin();
6441     auto ILHS = LHSExprs.begin();
6442     auto IRHS = RHSExprs.begin();
6443     for (const Expr *E : ReductionOps) {
6444       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6445                                      cast<DeclRefExpr>(*IRHS));
6446       ++IPriv;
6447       ++ILHS;
6448       ++IRHS;
6449     }
6450   };
6451   RegionCodeGenTy RCG(CodeGen);
6452   CommonActionTy Action(
6453       nullptr, llvm::None,
6454       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6455                                        : OMPRTL__kmpc_end_reduce),
6456       EndArgs);
6457   RCG.setAction(Action);
6458   RCG(CGF);
6459 
6460   CGF.EmitBranch(DefaultBB);
6461 
6462   // 7. Build case 2:
6463   //  ...
6464   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6465   //  ...
6466   // break;
6467   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6468   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6469   CGF.EmitBlock(Case2BB);
6470 
6471   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6472                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6473     auto ILHS = LHSExprs.begin();
6474     auto IRHS = RHSExprs.begin();
6475     auto IPriv = Privates.begin();
6476     for (const Expr *E : ReductionOps) {
6477       const Expr *XExpr = nullptr;
6478       const Expr *EExpr = nullptr;
6479       const Expr *UpExpr = nullptr;
6480       BinaryOperatorKind BO = BO_Comma;
6481       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6482         if (BO->getOpcode() == BO_Assign) {
6483           XExpr = BO->getLHS();
6484           UpExpr = BO->getRHS();
6485         }
6486       }
6487       // Try to emit update expression as a simple atomic.
6488       const Expr *RHSExpr = UpExpr;
6489       if (RHSExpr) {
6490         // Analyze RHS part of the whole expression.
6491         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6492                 RHSExpr->IgnoreParenImpCasts())) {
6493           // If this is a conditional operator, analyze its condition for
6494           // min/max reduction operator.
6495           RHSExpr = ACO->getCond();
6496         }
6497         if (const auto *BORHS =
6498                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6499           EExpr = BORHS->getRHS();
6500           BO = BORHS->getOpcode();
6501         }
6502       }
6503       if (XExpr) {
6504         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6505         auto &&AtomicRedGen = [BO, VD,
6506                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6507                                     const Expr *EExpr, const Expr *UpExpr) {
6508           LValue X = CGF.EmitLValue(XExpr);
6509           RValue E;
6510           if (EExpr)
6511             E = CGF.EmitAnyExpr(EExpr);
6512           CGF.EmitOMPAtomicSimpleUpdateExpr(
6513               X, E, BO, /*IsXLHSInRHSPart=*/true,
6514               llvm::AtomicOrdering::Monotonic, Loc,
6515               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6516                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6517                 PrivateScope.addPrivate(
6518                     VD, [&CGF, VD, XRValue, Loc]() {
6519                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6520                       CGF.emitOMPSimpleStore(
6521                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6522                           VD->getType().getNonReferenceType(), Loc);
6523                       return LHSTemp;
6524                     });
6525                 (void)PrivateScope.Privatize();
6526                 return CGF.EmitAnyExpr(UpExpr);
6527               });
6528         };
6529         if ((*IPriv)->getType()->isArrayType()) {
6530           // Emit atomic reduction for array section.
6531           const auto *RHSVar =
6532               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6533           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6534                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6535         } else {
6536           // Emit atomic reduction for array subscript or single variable.
6537           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6538         }
6539       } else {
6540         // Emit as a critical region.
6541         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6542                                            const Expr *, const Expr *) {
6543           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6544           std::string Name = RT.getName({"atomic_reduction"});
6545           RT.emitCriticalRegion(
6546               CGF, Name,
6547               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6548                 Action.Enter(CGF);
6549                 emitReductionCombiner(CGF, E);
6550               },
6551               Loc);
6552         };
6553         if ((*IPriv)->getType()->isArrayType()) {
6554           const auto *LHSVar =
6555               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6556           const auto *RHSVar =
6557               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6558           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6559                                     CritRedGen);
6560         } else {
6561           CritRedGen(CGF, nullptr, nullptr, nullptr);
6562         }
6563       }
6564       ++ILHS;
6565       ++IRHS;
6566       ++IPriv;
6567     }
6568   };
6569   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6570   if (!WithNowait) {
6571     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6572     llvm::Value *EndArgs[] = {
6573         IdentTLoc, // ident_t *<loc>
6574         ThreadId,  // i32 <gtid>
6575         Lock       // kmp_critical_name *&<lock>
6576     };
6577     CommonActionTy Action(nullptr, llvm::None,
6578                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6579                           EndArgs);
6580     AtomicRCG.setAction(Action);
6581     AtomicRCG(CGF);
6582   } else {
6583     AtomicRCG(CGF);
6584   }
6585 
6586   CGF.EmitBranch(DefaultBB);
6587   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6588 }
6589 
6590 /// Generates unique name for artificial threadprivate variables.
6591 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6592 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6593                                       const Expr *Ref) {
6594   SmallString<256> Buffer;
6595   llvm::raw_svector_ostream Out(Buffer);
6596   const clang::DeclRefExpr *DE;
6597   const VarDecl *D = ::getBaseDecl(Ref, DE);
6598   if (!D)
6599     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6600   D = D->getCanonicalDecl();
6601   std::string Name = CGM.getOpenMPRuntime().getName(
6602       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6603   Out << Prefix << Name << "_"
6604       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6605   return std::string(Out.str());
6606 }
6607 
6608 /// Emits reduction initializer function:
6609 /// \code
6610 /// void @.red_init(void* %arg, void* %orig) {
6611 /// %0 = bitcast void* %arg to <type>*
6612 /// store <type> <init>, <type>* %0
6613 /// ret void
6614 /// }
6615 /// \endcode
6616 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6617                                            SourceLocation Loc,
6618                                            ReductionCodeGen &RCG, unsigned N) {
6619   ASTContext &C = CGM.getContext();
6620   QualType VoidPtrTy = C.VoidPtrTy;
6621   VoidPtrTy.addRestrict();
6622   FunctionArgList Args;
6623   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6624                           ImplicitParamDecl::Other);
6625   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6626                               ImplicitParamDecl::Other);
6627   Args.emplace_back(&Param);
6628   Args.emplace_back(&ParamOrig);
6629   const auto &FnInfo =
6630       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6631   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6632   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6633   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6634                                     Name, &CGM.getModule());
6635   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6636   Fn->setDoesNotRecurse();
6637   CodeGenFunction CGF(CGM);
6638   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6639   Address PrivateAddr = CGF.EmitLoadOfPointer(
6640       CGF.GetAddrOfLocalVar(&Param),
6641       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6642   llvm::Value *Size = nullptr;
6643   // If the size of the reduction item is non-constant, load it from global
6644   // threadprivate variable.
6645   if (RCG.getSizes(N).second) {
6646     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6647         CGF, CGM.getContext().getSizeType(),
6648         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6649     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6650                                 CGM.getContext().getSizeType(), Loc);
6651   }
6652   RCG.emitAggregateType(CGF, N, Size);
6653   LValue OrigLVal;
6654   // If initializer uses initializer from declare reduction construct, emit a
6655   // pointer to the address of the original reduction item (reuired by reduction
6656   // initializer)
6657   if (RCG.usesReductionInitializer(N)) {
6658     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
6659     SharedAddr = CGF.EmitLoadOfPointer(
6660         SharedAddr,
6661         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6662     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6663   } else {
6664     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
6665         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6666         CGM.getContext().VoidPtrTy);
6667   }
6668   // Emit the initializer:
6669   // %0 = bitcast void* %arg to <type>*
6670   // store <type> <init>, <type>* %0
6671   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
6672                          [](CodeGenFunction &) { return false; });
6673   CGF.FinishFunction();
6674   return Fn;
6675 }
6676 
6677 /// Emits reduction combiner function:
6678 /// \code
6679 /// void @.red_comb(void* %arg0, void* %arg1) {
6680 /// %lhs = bitcast void* %arg0 to <type>*
6681 /// %rhs = bitcast void* %arg1 to <type>*
6682 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6683 /// store <type> %2, <type>* %lhs
6684 /// ret void
6685 /// }
6686 /// \endcode
6687 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6688                                            SourceLocation Loc,
6689                                            ReductionCodeGen &RCG, unsigned N,
6690                                            const Expr *ReductionOp,
6691                                            const Expr *LHS, const Expr *RHS,
6692                                            const Expr *PrivateRef) {
6693   ASTContext &C = CGM.getContext();
6694   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6695   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6696   FunctionArgList Args;
6697   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6698                                C.VoidPtrTy, ImplicitParamDecl::Other);
6699   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6700                             ImplicitParamDecl::Other);
6701   Args.emplace_back(&ParamInOut);
6702   Args.emplace_back(&ParamIn);
6703   const auto &FnInfo =
6704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6705   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6706   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6707   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6708                                     Name, &CGM.getModule());
6709   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6710   Fn->setDoesNotRecurse();
6711   CodeGenFunction CGF(CGM);
6712   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6713   llvm::Value *Size = nullptr;
6714   // If the size of the reduction item is non-constant, load it from global
6715   // threadprivate variable.
6716   if (RCG.getSizes(N).second) {
6717     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6718         CGF, CGM.getContext().getSizeType(),
6719         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6720     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6721                                 CGM.getContext().getSizeType(), Loc);
6722   }
6723   RCG.emitAggregateType(CGF, N, Size);
6724   // Remap lhs and rhs variables to the addresses of the function arguments.
6725   // %lhs = bitcast void* %arg0 to <type>*
6726   // %rhs = bitcast void* %arg1 to <type>*
6727   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6728   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6729     // Pull out the pointer to the variable.
6730     Address PtrAddr = CGF.EmitLoadOfPointer(
6731         CGF.GetAddrOfLocalVar(&ParamInOut),
6732         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6733     return CGF.Builder.CreateElementBitCast(
6734         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6735   });
6736   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6737     // Pull out the pointer to the variable.
6738     Address PtrAddr = CGF.EmitLoadOfPointer(
6739         CGF.GetAddrOfLocalVar(&ParamIn),
6740         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6741     return CGF.Builder.CreateElementBitCast(
6742         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6743   });
6744   PrivateScope.Privatize();
6745   // Emit the combiner body:
6746   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6747   // store <type> %2, <type>* %lhs
6748   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6749       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6750       cast<DeclRefExpr>(RHS));
6751   CGF.FinishFunction();
6752   return Fn;
6753 }
6754 
6755 /// Emits reduction finalizer function:
6756 /// \code
6757 /// void @.red_fini(void* %arg) {
6758 /// %0 = bitcast void* %arg to <type>*
6759 /// <destroy>(<type>* %0)
6760 /// ret void
6761 /// }
6762 /// \endcode
6763 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6764                                            SourceLocation Loc,
6765                                            ReductionCodeGen &RCG, unsigned N) {
6766   if (!RCG.needCleanups(N))
6767     return nullptr;
6768   ASTContext &C = CGM.getContext();
6769   FunctionArgList Args;
6770   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6771                           ImplicitParamDecl::Other);
6772   Args.emplace_back(&Param);
6773   const auto &FnInfo =
6774       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6775   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6776   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6777   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6778                                     Name, &CGM.getModule());
6779   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6780   Fn->setDoesNotRecurse();
6781   CodeGenFunction CGF(CGM);
6782   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6783   Address PrivateAddr = CGF.EmitLoadOfPointer(
6784       CGF.GetAddrOfLocalVar(&Param),
6785       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6786   llvm::Value *Size = nullptr;
6787   // If the size of the reduction item is non-constant, load it from global
6788   // threadprivate variable.
6789   if (RCG.getSizes(N).second) {
6790     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6791         CGF, CGM.getContext().getSizeType(),
6792         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6793     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6794                                 CGM.getContext().getSizeType(), Loc);
6795   }
6796   RCG.emitAggregateType(CGF, N, Size);
6797   // Emit the finalizer body:
6798   // <destroy>(<type>* %0)
6799   RCG.emitCleanups(CGF, N, PrivateAddr);
6800   CGF.FinishFunction(Loc);
6801   return Fn;
6802 }
6803 
6804 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6805     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6806     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6807   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6808     return nullptr;
6809 
6810   // Build typedef struct:
6811   // kmp_taskred_input {
6812   //   void *reduce_shar; // shared reduction item
6813   //   void *reduce_orig; // original reduction item used for initialization
6814   //   size_t reduce_size; // size of data item
6815   //   void *reduce_init; // data initialization routine
6816   //   void *reduce_fini; // data finalization routine
6817   //   void *reduce_comb; // data combiner routine
6818   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6819   // } kmp_taskred_input_t;
6820   ASTContext &C = CGM.getContext();
6821   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6822   RD->startDefinition();
6823   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6824   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6825   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6826   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6827   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6828   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6829   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6830       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6831   RD->completeDefinition();
6832   QualType RDType = C.getRecordType(RD);
6833   unsigned Size = Data.ReductionVars.size();
6834   llvm::APInt ArraySize(/*numBits=*/64, Size);
6835   QualType ArrayRDType = C.getConstantArrayType(
6836       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6837   // kmp_task_red_input_t .rd_input.[Size];
6838   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6839   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6840                        Data.ReductionCopies, Data.ReductionOps);
6841   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6842     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6843     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6844                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6845     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6846         TaskRedInput.getPointer(), Idxs,
6847         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6848         ".rd_input.gep.");
6849     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6850     // ElemLVal.reduce_shar = &Shareds[Cnt];
6851     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6852     RCG.emitSharedOrigLValue(CGF, Cnt);
6853     llvm::Value *CastedShared =
6854         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6855     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6856     // ElemLVal.reduce_orig = &Origs[Cnt];
6857     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6858     llvm::Value *CastedOrig =
6859         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6860     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6861     RCG.emitAggregateType(CGF, Cnt);
6862     llvm::Value *SizeValInChars;
6863     llvm::Value *SizeVal;
6864     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6865     // We use delayed creation/initialization for VLAs and array sections. It is
6866     // required because runtime does not provide the way to pass the sizes of
6867     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6868     // threadprivate global variables are used to store these values and use
6869     // them in the functions.
6870     bool DelayedCreation = !!SizeVal;
6871     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6872                                                /*isSigned=*/false);
6873     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6874     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6875     // ElemLVal.reduce_init = init;
6876     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6877     llvm::Value *InitAddr =
6878         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6879     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6880     // ElemLVal.reduce_fini = fini;
6881     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6882     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6883     llvm::Value *FiniAddr = Fini
6884                                 ? CGF.EmitCastToVoidPtr(Fini)
6885                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6886     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6887     // ElemLVal.reduce_comb = comb;
6888     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6889     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6890         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6891         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6892     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6893     // ElemLVal.flags = 0;
6894     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6895     if (DelayedCreation) {
6896       CGF.EmitStoreOfScalar(
6897           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6898           FlagsLVal);
6899     } else
6900       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6901                                  FlagsLVal.getType());
6902   }
6903   if (Data.IsReductionWithTaskMod) {
6904     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6905     // is_ws, int num, void *data);
6906     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6907     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6908                                                   CGM.IntTy, /*isSigned=*/true);
6909     llvm::Value *Args[] = {
6910         IdentTLoc, GTid,
6911         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6912                                /*isSigned=*/true),
6913         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6914         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6915             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6916     return CGF.EmitRuntimeCall(
6917         createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
6918   }
6919   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6920   llvm::Value *Args[] = {
6921       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6922                                 /*isSigned=*/true),
6923       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6924       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6925                                                       CGM.VoidPtrTy)};
6926   return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init),
6927                              Args);
6928 }
6929 
6930 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6931                                             SourceLocation Loc,
6932                                             bool IsWorksharingReduction) {
6933   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6934   // is_ws, int num, void *data);
6935   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6936   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6937                                                 CGM.IntTy, /*isSigned=*/true);
6938   llvm::Value *Args[] = {IdentTLoc, GTid,
6939                          llvm::ConstantInt::get(CGM.IntTy,
6940                                                 IsWorksharingReduction ? 1 : 0,
6941                                                 /*isSigned=*/true)};
6942   (void)CGF.EmitRuntimeCall(
6943       createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
6944 }
6945 
6946 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6947                                               SourceLocation Loc,
6948                                               ReductionCodeGen &RCG,
6949                                               unsigned N) {
6950   auto Sizes = RCG.getSizes(N);
6951   // Emit threadprivate global variable if the type is non-constant
6952   // (Sizes.second = nullptr).
6953   if (Sizes.second) {
6954     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6955                                                      /*isSigned=*/false);
6956     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6957         CGF, CGM.getContext().getSizeType(),
6958         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6959     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6960   }
6961 }
6962 
6963 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6964                                               SourceLocation Loc,
6965                                               llvm::Value *ReductionsPtr,
6966                                               LValue SharedLVal) {
6967   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6968   // *d);
6969   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6970                                                    CGM.IntTy,
6971                                                    /*isSigned=*/true),
6972                          ReductionsPtr,
6973                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6974                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6975   return Address(
6976       CGF.EmitRuntimeCall(
6977           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6978       SharedLVal.getAlignment());
6979 }
6980 
6981 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6982                                        SourceLocation Loc) {
6983   if (!CGF.HaveInsertPoint())
6984     return;
6985 
6986   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6987   if (OMPBuilder) {
6988     OMPBuilder->CreateTaskwait(CGF.Builder);
6989   } else {
6990     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6991     // global_tid);
6992     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6993     // Ignore return result until untied tasks are supported.
6994     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6995   }
6996 
6997   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6998     Region->emitUntiedSwitch(CGF);
6999 }
7000 
7001 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
7002                                            OpenMPDirectiveKind InnerKind,
7003                                            const RegionCodeGenTy &CodeGen,
7004                                            bool HasCancel) {
7005   if (!CGF.HaveInsertPoint())
7006     return;
7007   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
7008   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
7009 }
7010 
7011 namespace {
7012 enum RTCancelKind {
7013   CancelNoreq = 0,
7014   CancelParallel = 1,
7015   CancelLoop = 2,
7016   CancelSections = 3,
7017   CancelTaskgroup = 4
7018 };
7019 } // anonymous namespace
7020 
7021 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
7022   RTCancelKind CancelKind = CancelNoreq;
7023   if (CancelRegion == OMPD_parallel)
7024     CancelKind = CancelParallel;
7025   else if (CancelRegion == OMPD_for)
7026     CancelKind = CancelLoop;
7027   else if (CancelRegion == OMPD_sections)
7028     CancelKind = CancelSections;
7029   else {
7030     assert(CancelRegion == OMPD_taskgroup);
7031     CancelKind = CancelTaskgroup;
7032   }
7033   return CancelKind;
7034 }
7035 
7036 void CGOpenMPRuntime::emitCancellationPointCall(
7037     CodeGenFunction &CGF, SourceLocation Loc,
7038     OpenMPDirectiveKind CancelRegion) {
7039   if (!CGF.HaveInsertPoint())
7040     return;
7041   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
7042   // global_tid, kmp_int32 cncl_kind);
7043   if (auto *OMPRegionInfo =
7044           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7045     // For 'cancellation point taskgroup', the task region info may not have a
7046     // cancel. This may instead happen in another adjacent task.
7047     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
7048       llvm::Value *Args[] = {
7049           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
7050           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7051       // Ignore return result until untied tasks are supported.
7052       llvm::Value *Result = CGF.EmitRuntimeCall(
7053           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
7054       // if (__kmpc_cancellationpoint()) {
7055       //   exit from construct;
7056       // }
7057       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7058       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7059       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7060       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7061       CGF.EmitBlock(ExitBB);
7062       // exit from construct;
7063       CodeGenFunction::JumpDest CancelDest =
7064           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7065       CGF.EmitBranchThroughCleanup(CancelDest);
7066       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7067     }
7068   }
7069 }
7070 
7071 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
7072                                      const Expr *IfCond,
7073                                      OpenMPDirectiveKind CancelRegion) {
7074   if (!CGF.HaveInsertPoint())
7075     return;
7076   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
7077   // kmp_int32 cncl_kind);
7078   if (auto *OMPRegionInfo =
7079           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
7080     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
7081                                                         PrePostActionTy &) {
7082       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
7083       llvm::Value *Args[] = {
7084           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
7085           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
7086       // Ignore return result until untied tasks are supported.
7087       llvm::Value *Result = CGF.EmitRuntimeCall(
7088           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
7089       // if (__kmpc_cancel()) {
7090       //   exit from construct;
7091       // }
7092       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
7093       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
7094       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
7095       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
7096       CGF.EmitBlock(ExitBB);
7097       // exit from construct;
7098       CodeGenFunction::JumpDest CancelDest =
7099           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
7100       CGF.EmitBranchThroughCleanup(CancelDest);
7101       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
7102     };
7103     if (IfCond) {
7104       emitIfClause(CGF, IfCond, ThenGen,
7105                    [](CodeGenFunction &, PrePostActionTy &) {});
7106     } else {
7107       RegionCodeGenTy ThenRCG(ThenGen);
7108       ThenRCG(CGF);
7109     }
7110   }
7111 }
7112 
7113 namespace {
7114 /// Cleanup action for uses_allocators support.
7115 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
7116   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
7117 
7118 public:
7119   OMPUsesAllocatorsActionTy(
7120       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
7121       : Allocators(Allocators) {}
7122   void Enter(CodeGenFunction &CGF) override {
7123     if (!CGF.HaveInsertPoint())
7124       return;
7125     for (const auto &AllocatorData : Allocators) {
7126       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
7127           CGF, AllocatorData.first, AllocatorData.second);
7128     }
7129   }
7130   void Exit(CodeGenFunction &CGF) override {
7131     if (!CGF.HaveInsertPoint())
7132       return;
7133     for (const auto &AllocatorData : Allocators) {
7134       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
7135                                                         AllocatorData.first);
7136     }
7137   }
7138 };
7139 } // namespace
7140 
7141 void CGOpenMPRuntime::emitTargetOutlinedFunction(
7142     const OMPExecutableDirective &D, StringRef ParentName,
7143     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7144     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7145   assert(!ParentName.empty() && "Invalid target region parent name!");
7146   HasEmittedTargetRegion = true;
7147   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
7148   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7149     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7150       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7151       if (!D.AllocatorTraits)
7152         continue;
7153       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
7154     }
7155   }
7156   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
7157   CodeGen.setAction(UsesAllocatorAction);
7158   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
7159                                    IsOffloadEntry, CodeGen);
7160 }
7161 
7162 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
7163                                              const Expr *Allocator,
7164                                              const Expr *AllocatorTraits) {
7165   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
7166   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
7167   // Use default memspace handle.
7168   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
7169   llvm::Value *NumTraits = llvm::ConstantInt::get(
7170       CGF.IntTy, cast<ConstantArrayType>(
7171                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
7172                      ->getSize()
7173                      .getLimitedValue());
7174   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
7175   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7176       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
7177   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
7178                                            AllocatorTraitsLVal.getBaseInfo(),
7179                                            AllocatorTraitsLVal.getTBAAInfo());
7180   llvm::Value *Traits =
7181       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
7182 
7183   llvm::Value *AllocatorVal =
7184       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator),
7185                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
7186   // Store to allocator.
7187   CGF.EmitVarDecl(*cast<VarDecl>(
7188       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
7189   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
7190   AllocatorVal =
7191       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
7192                                Allocator->getType(), Allocator->getExprLoc());
7193   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
7194 }
7195 
7196 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
7197                                              const Expr *Allocator) {
7198   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
7199   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
7200   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
7201   llvm::Value *AllocatorVal =
7202       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
7203   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
7204                                           CGF.getContext().VoidPtrTy,
7205                                           Allocator->getExprLoc());
7206   (void)CGF.EmitRuntimeCall(
7207       createRuntimeFunction(OMPRTL__kmpc_destroy_allocator),
7208       {ThreadId, AllocatorVal});
7209 }
7210 
7211 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
7212     const OMPExecutableDirective &D, StringRef ParentName,
7213     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
7214     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
7215   // Create a unique name for the entry function using the source location
7216   // information of the current target region. The name will be something like:
7217   //
7218   // __omp_offloading_DD_FFFF_PP_lBB
7219   //
7220   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
7221   // mangled name of the function that encloses the target region and BB is the
7222   // line number of the target region.
7223 
7224   unsigned DeviceID;
7225   unsigned FileID;
7226   unsigned Line;
7227   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
7228                            Line);
7229   SmallString<64> EntryFnName;
7230   {
7231     llvm::raw_svector_ostream OS(EntryFnName);
7232     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
7233        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
7234   }
7235 
7236   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
7237 
7238   CodeGenFunction CGF(CGM, true);
7239   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
7240   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7241 
7242   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
7243 
7244   // If this target outline function is not an offload entry, we don't need to
7245   // register it.
7246   if (!IsOffloadEntry)
7247     return;
7248 
7249   // The target region ID is used by the runtime library to identify the current
7250   // target region, so it only has to be unique and not necessarily point to
7251   // anything. It could be the pointer to the outlined function that implements
7252   // the target region, but we aren't using that so that the compiler doesn't
7253   // need to keep that, and could therefore inline the host function if proven
7254   // worthwhile during optimization. In the other hand, if emitting code for the
7255   // device, the ID has to be the function address so that it can retrieved from
7256   // the offloading entry and launched by the runtime library. We also mark the
7257   // outlined function to have external linkage in case we are emitting code for
7258   // the device, because these functions will be entry points to the device.
7259 
7260   if (CGM.getLangOpts().OpenMPIsDevice) {
7261     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
7262     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
7263     OutlinedFn->setDSOLocal(false);
7264   } else {
7265     std::string Name = getName({EntryFnName, "region_id"});
7266     OutlinedFnID = new llvm::GlobalVariable(
7267         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
7268         llvm::GlobalValue::WeakAnyLinkage,
7269         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
7270   }
7271 
7272   // Register the information for the entry associated with this target region.
7273   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
7274       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
7275       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
7276 }
7277 
7278 /// Checks if the expression is constant or does not have non-trivial function
7279 /// calls.
7280 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
7281   // We can skip constant expressions.
7282   // We can skip expressions with trivial calls or simple expressions.
7283   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
7284           !E->hasNonTrivialCall(Ctx)) &&
7285          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
7286 }
7287 
7288 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
7289                                                     const Stmt *Body) {
7290   const Stmt *Child = Body->IgnoreContainers();
7291   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
7292     Child = nullptr;
7293     for (const Stmt *S : C->body()) {
7294       if (const auto *E = dyn_cast<Expr>(S)) {
7295         if (isTrivial(Ctx, E))
7296           continue;
7297       }
7298       // Some of the statements can be ignored.
7299       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
7300           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
7301         continue;
7302       // Analyze declarations.
7303       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
7304         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
7305               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
7306                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
7307                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
7308                   isa<UsingDirectiveDecl>(D) ||
7309                   isa<OMPDeclareReductionDecl>(D) ||
7310                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
7311                 return true;
7312               const auto *VD = dyn_cast<VarDecl>(D);
7313               if (!VD)
7314                 return false;
7315               return VD->isConstexpr() ||
7316                      ((VD->getType().isTrivialType(Ctx) ||
7317                        VD->getType()->isReferenceType()) &&
7318                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
7319             }))
7320           continue;
7321       }
7322       // Found multiple children - cannot get the one child only.
7323       if (Child)
7324         return nullptr;
7325       Child = S;
7326     }
7327     if (Child)
7328       Child = Child->IgnoreContainers();
7329   }
7330   return Child;
7331 }
7332 
7333 /// Emit the number of teams for a target directive.  Inspect the num_teams
7334 /// clause associated with a teams construct combined or closely nested
7335 /// with the target directive.
7336 ///
7337 /// Emit a team of size one for directives such as 'target parallel' that
7338 /// have no associated teams construct.
7339 ///
7340 /// Otherwise, return nullptr.
7341 static llvm::Value *
7342 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
7343                                const OMPExecutableDirective &D) {
7344   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7345          "Clauses associated with the teams directive expected to be emitted "
7346          "only for the host!");
7347   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7348   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7349          "Expected target-based executable directive.");
7350   CGBuilderTy &Bld = CGF.Builder;
7351   switch (DirectiveKind) {
7352   case OMPD_target: {
7353     const auto *CS = D.getInnermostCapturedStmt();
7354     const auto *Body =
7355         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
7356     const Stmt *ChildStmt =
7357         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
7358     if (const auto *NestedDir =
7359             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
7360       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
7361         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
7362           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7363           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7364           const Expr *NumTeams =
7365               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7366           llvm::Value *NumTeamsVal =
7367               CGF.EmitScalarExpr(NumTeams,
7368                                  /*IgnoreResultAssign*/ true);
7369           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7370                                    /*isSigned=*/true);
7371         }
7372         return Bld.getInt32(0);
7373       }
7374       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
7375           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
7376         return Bld.getInt32(1);
7377       return Bld.getInt32(0);
7378     }
7379     return nullptr;
7380   }
7381   case OMPD_target_teams:
7382   case OMPD_target_teams_distribute:
7383   case OMPD_target_teams_distribute_simd:
7384   case OMPD_target_teams_distribute_parallel_for:
7385   case OMPD_target_teams_distribute_parallel_for_simd: {
7386     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
7387       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
7388       const Expr *NumTeams =
7389           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
7390       llvm::Value *NumTeamsVal =
7391           CGF.EmitScalarExpr(NumTeams,
7392                              /*IgnoreResultAssign*/ true);
7393       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
7394                                /*isSigned=*/true);
7395     }
7396     return Bld.getInt32(0);
7397   }
7398   case OMPD_target_parallel:
7399   case OMPD_target_parallel_for:
7400   case OMPD_target_parallel_for_simd:
7401   case OMPD_target_simd:
7402     return Bld.getInt32(1);
7403   case OMPD_parallel:
7404   case OMPD_for:
7405   case OMPD_parallel_for:
7406   case OMPD_parallel_master:
7407   case OMPD_parallel_sections:
7408   case OMPD_for_simd:
7409   case OMPD_parallel_for_simd:
7410   case OMPD_cancel:
7411   case OMPD_cancellation_point:
7412   case OMPD_ordered:
7413   case OMPD_threadprivate:
7414   case OMPD_allocate:
7415   case OMPD_task:
7416   case OMPD_simd:
7417   case OMPD_sections:
7418   case OMPD_section:
7419   case OMPD_single:
7420   case OMPD_master:
7421   case OMPD_critical:
7422   case OMPD_taskyield:
7423   case OMPD_barrier:
7424   case OMPD_taskwait:
7425   case OMPD_taskgroup:
7426   case OMPD_atomic:
7427   case OMPD_flush:
7428   case OMPD_depobj:
7429   case OMPD_scan:
7430   case OMPD_teams:
7431   case OMPD_target_data:
7432   case OMPD_target_exit_data:
7433   case OMPD_target_enter_data:
7434   case OMPD_distribute:
7435   case OMPD_distribute_simd:
7436   case OMPD_distribute_parallel_for:
7437   case OMPD_distribute_parallel_for_simd:
7438   case OMPD_teams_distribute:
7439   case OMPD_teams_distribute_simd:
7440   case OMPD_teams_distribute_parallel_for:
7441   case OMPD_teams_distribute_parallel_for_simd:
7442   case OMPD_target_update:
7443   case OMPD_declare_simd:
7444   case OMPD_declare_variant:
7445   case OMPD_begin_declare_variant:
7446   case OMPD_end_declare_variant:
7447   case OMPD_declare_target:
7448   case OMPD_end_declare_target:
7449   case OMPD_declare_reduction:
7450   case OMPD_declare_mapper:
7451   case OMPD_taskloop:
7452   case OMPD_taskloop_simd:
7453   case OMPD_master_taskloop:
7454   case OMPD_master_taskloop_simd:
7455   case OMPD_parallel_master_taskloop:
7456   case OMPD_parallel_master_taskloop_simd:
7457   case OMPD_requires:
7458   case OMPD_unknown:
7459     break;
7460   }
7461   llvm_unreachable("Unexpected directive kind.");
7462 }
7463 
7464 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
7465                                   llvm::Value *DefaultThreadLimitVal) {
7466   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7467       CGF.getContext(), CS->getCapturedStmt());
7468   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7469     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
7470       llvm::Value *NumThreads = nullptr;
7471       llvm::Value *CondVal = nullptr;
7472       // Handle if clause. If if clause present, the number of threads is
7473       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7474       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7475         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7476         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7477         const OMPIfClause *IfClause = nullptr;
7478         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7479           if (C->getNameModifier() == OMPD_unknown ||
7480               C->getNameModifier() == OMPD_parallel) {
7481             IfClause = C;
7482             break;
7483           }
7484         }
7485         if (IfClause) {
7486           const Expr *Cond = IfClause->getCondition();
7487           bool Result;
7488           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7489             if (!Result)
7490               return CGF.Builder.getInt32(1);
7491           } else {
7492             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7493             if (const auto *PreInit =
7494                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7495               for (const auto *I : PreInit->decls()) {
7496                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7497                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7498                 } else {
7499                   CodeGenFunction::AutoVarEmission Emission =
7500                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7501                   CGF.EmitAutoVarCleanups(Emission);
7502                 }
7503               }
7504             }
7505             CondVal = CGF.EvaluateExprAsBool(Cond);
7506           }
7507         }
7508       }
7509       // Check the value of num_threads clause iff if clause was not specified
7510       // or is not evaluated to false.
7511       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7512         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7513         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7514         const auto *NumThreadsClause =
7515             Dir->getSingleClause<OMPNumThreadsClause>();
7516         CodeGenFunction::LexicalScope Scope(
7517             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7518         if (const auto *PreInit =
7519                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7520           for (const auto *I : PreInit->decls()) {
7521             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7522               CGF.EmitVarDecl(cast<VarDecl>(*I));
7523             } else {
7524               CodeGenFunction::AutoVarEmission Emission =
7525                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7526               CGF.EmitAutoVarCleanups(Emission);
7527             }
7528           }
7529         }
7530         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7531         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7532                                                /*isSigned=*/false);
7533         if (DefaultThreadLimitVal)
7534           NumThreads = CGF.Builder.CreateSelect(
7535               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7536               DefaultThreadLimitVal, NumThreads);
7537       } else {
7538         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7539                                            : CGF.Builder.getInt32(0);
7540       }
7541       // Process condition of the if clause.
7542       if (CondVal) {
7543         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7544                                               CGF.Builder.getInt32(1));
7545       }
7546       return NumThreads;
7547     }
7548     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7549       return CGF.Builder.getInt32(1);
7550     return DefaultThreadLimitVal;
7551   }
7552   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7553                                : CGF.Builder.getInt32(0);
7554 }
7555 
7556 /// Emit the number of threads for a target directive.  Inspect the
7557 /// thread_limit clause associated with a teams construct combined or closely
7558 /// nested with the target directive.
7559 ///
7560 /// Emit the num_threads clause for directives such as 'target parallel' that
7561 /// have no associated teams construct.
7562 ///
7563 /// Otherwise, return nullptr.
7564 static llvm::Value *
7565 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7566                                  const OMPExecutableDirective &D) {
7567   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7568          "Clauses associated with the teams directive expected to be emitted "
7569          "only for the host!");
7570   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7571   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7572          "Expected target-based executable directive.");
7573   CGBuilderTy &Bld = CGF.Builder;
7574   llvm::Value *ThreadLimitVal = nullptr;
7575   llvm::Value *NumThreadsVal = nullptr;
7576   switch (DirectiveKind) {
7577   case OMPD_target: {
7578     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7579     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7580       return NumThreads;
7581     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7582         CGF.getContext(), CS->getCapturedStmt());
7583     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7584       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7585         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7586         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7587         const auto *ThreadLimitClause =
7588             Dir->getSingleClause<OMPThreadLimitClause>();
7589         CodeGenFunction::LexicalScope Scope(
7590             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7591         if (const auto *PreInit =
7592                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7593           for (const auto *I : PreInit->decls()) {
7594             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7595               CGF.EmitVarDecl(cast<VarDecl>(*I));
7596             } else {
7597               CodeGenFunction::AutoVarEmission Emission =
7598                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7599               CGF.EmitAutoVarCleanups(Emission);
7600             }
7601           }
7602         }
7603         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7604             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7605         ThreadLimitVal =
7606             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7607       }
7608       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7609           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7610         CS = Dir->getInnermostCapturedStmt();
7611         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7612             CGF.getContext(), CS->getCapturedStmt());
7613         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7614       }
7615       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7616           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7617         CS = Dir->getInnermostCapturedStmt();
7618         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7619           return NumThreads;
7620       }
7621       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7622         return Bld.getInt32(1);
7623     }
7624     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7625   }
7626   case OMPD_target_teams: {
7627     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7628       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7629       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7630       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7631           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7632       ThreadLimitVal =
7633           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7634     }
7635     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7636     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7637       return NumThreads;
7638     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7639         CGF.getContext(), CS->getCapturedStmt());
7640     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7641       if (Dir->getDirectiveKind() == OMPD_distribute) {
7642         CS = Dir->getInnermostCapturedStmt();
7643         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7644           return NumThreads;
7645       }
7646     }
7647     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7648   }
7649   case OMPD_target_teams_distribute:
7650     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7651       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7652       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7653       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7654           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7655       ThreadLimitVal =
7656           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7657     }
7658     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7659   case OMPD_target_parallel:
7660   case OMPD_target_parallel_for:
7661   case OMPD_target_parallel_for_simd:
7662   case OMPD_target_teams_distribute_parallel_for:
7663   case OMPD_target_teams_distribute_parallel_for_simd: {
7664     llvm::Value *CondVal = nullptr;
7665     // Handle if clause. If if clause present, the number of threads is
7666     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7667     if (D.hasClausesOfKind<OMPIfClause>()) {
7668       const OMPIfClause *IfClause = nullptr;
7669       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7670         if (C->getNameModifier() == OMPD_unknown ||
7671             C->getNameModifier() == OMPD_parallel) {
7672           IfClause = C;
7673           break;
7674         }
7675       }
7676       if (IfClause) {
7677         const Expr *Cond = IfClause->getCondition();
7678         bool Result;
7679         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7680           if (!Result)
7681             return Bld.getInt32(1);
7682         } else {
7683           CodeGenFunction::RunCleanupsScope Scope(CGF);
7684           CondVal = CGF.EvaluateExprAsBool(Cond);
7685         }
7686       }
7687     }
7688     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7689       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7690       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7691       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7692           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7693       ThreadLimitVal =
7694           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7695     }
7696     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7697       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7698       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7699       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7700           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7701       NumThreadsVal =
7702           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7703       ThreadLimitVal = ThreadLimitVal
7704                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7705                                                                 ThreadLimitVal),
7706                                               NumThreadsVal, ThreadLimitVal)
7707                            : NumThreadsVal;
7708     }
7709     if (!ThreadLimitVal)
7710       ThreadLimitVal = Bld.getInt32(0);
7711     if (CondVal)
7712       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7713     return ThreadLimitVal;
7714   }
7715   case OMPD_target_teams_distribute_simd:
7716   case OMPD_target_simd:
7717     return Bld.getInt32(1);
7718   case OMPD_parallel:
7719   case OMPD_for:
7720   case OMPD_parallel_for:
7721   case OMPD_parallel_master:
7722   case OMPD_parallel_sections:
7723   case OMPD_for_simd:
7724   case OMPD_parallel_for_simd:
7725   case OMPD_cancel:
7726   case OMPD_cancellation_point:
7727   case OMPD_ordered:
7728   case OMPD_threadprivate:
7729   case OMPD_allocate:
7730   case OMPD_task:
7731   case OMPD_simd:
7732   case OMPD_sections:
7733   case OMPD_section:
7734   case OMPD_single:
7735   case OMPD_master:
7736   case OMPD_critical:
7737   case OMPD_taskyield:
7738   case OMPD_barrier:
7739   case OMPD_taskwait:
7740   case OMPD_taskgroup:
7741   case OMPD_atomic:
7742   case OMPD_flush:
7743   case OMPD_depobj:
7744   case OMPD_scan:
7745   case OMPD_teams:
7746   case OMPD_target_data:
7747   case OMPD_target_exit_data:
7748   case OMPD_target_enter_data:
7749   case OMPD_distribute:
7750   case OMPD_distribute_simd:
7751   case OMPD_distribute_parallel_for:
7752   case OMPD_distribute_parallel_for_simd:
7753   case OMPD_teams_distribute:
7754   case OMPD_teams_distribute_simd:
7755   case OMPD_teams_distribute_parallel_for:
7756   case OMPD_teams_distribute_parallel_for_simd:
7757   case OMPD_target_update:
7758   case OMPD_declare_simd:
7759   case OMPD_declare_variant:
7760   case OMPD_begin_declare_variant:
7761   case OMPD_end_declare_variant:
7762   case OMPD_declare_target:
7763   case OMPD_end_declare_target:
7764   case OMPD_declare_reduction:
7765   case OMPD_declare_mapper:
7766   case OMPD_taskloop:
7767   case OMPD_taskloop_simd:
7768   case OMPD_master_taskloop:
7769   case OMPD_master_taskloop_simd:
7770   case OMPD_parallel_master_taskloop:
7771   case OMPD_parallel_master_taskloop_simd:
7772   case OMPD_requires:
7773   case OMPD_unknown:
7774     break;
7775   }
7776   llvm_unreachable("Unsupported directive kind.");
7777 }
7778 
7779 namespace {
7780 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7781 
7782 // Utility to handle information from clauses associated with a given
7783 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7784 // It provides a convenient interface to obtain the information and generate
7785 // code for that information.
7786 class MappableExprsHandler {
7787 public:
7788   /// Values for bit flags used to specify the mapping type for
7789   /// offloading.
7790   enum OpenMPOffloadMappingFlags : uint64_t {
7791     /// No flags
7792     OMP_MAP_NONE = 0x0,
7793     /// Allocate memory on the device and move data from host to device.
7794     OMP_MAP_TO = 0x01,
7795     /// Allocate memory on the device and move data from device to host.
7796     OMP_MAP_FROM = 0x02,
7797     /// Always perform the requested mapping action on the element, even
7798     /// if it was already mapped before.
7799     OMP_MAP_ALWAYS = 0x04,
7800     /// Delete the element from the device environment, ignoring the
7801     /// current reference count associated with the element.
7802     OMP_MAP_DELETE = 0x08,
7803     /// The element being mapped is a pointer-pointee pair; both the
7804     /// pointer and the pointee should be mapped.
7805     OMP_MAP_PTR_AND_OBJ = 0x10,
7806     /// This flags signals that the base address of an entry should be
7807     /// passed to the target kernel as an argument.
7808     OMP_MAP_TARGET_PARAM = 0x20,
7809     /// Signal that the runtime library has to return the device pointer
7810     /// in the current position for the data being mapped. Used when we have the
7811     /// use_device_ptr clause.
7812     OMP_MAP_RETURN_PARAM = 0x40,
7813     /// This flag signals that the reference being passed is a pointer to
7814     /// private data.
7815     OMP_MAP_PRIVATE = 0x80,
7816     /// Pass the element to the device by value.
7817     OMP_MAP_LITERAL = 0x100,
7818     /// Implicit map
7819     OMP_MAP_IMPLICIT = 0x200,
7820     /// Close is a hint to the runtime to allocate memory close to
7821     /// the target device.
7822     OMP_MAP_CLOSE = 0x400,
7823     /// The 16 MSBs of the flags indicate whether the entry is member of some
7824     /// struct/class.
7825     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7826     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7827   };
7828 
7829   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7830   static unsigned getFlagMemberOffset() {
7831     unsigned Offset = 0;
7832     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7833          Remain = Remain >> 1)
7834       Offset++;
7835     return Offset;
7836   }
7837 
7838   /// Class that associates information with a base pointer to be passed to the
7839   /// runtime library.
7840   class BasePointerInfo {
7841     /// The base pointer.
7842     llvm::Value *Ptr = nullptr;
7843     /// The base declaration that refers to this device pointer, or null if
7844     /// there is none.
7845     const ValueDecl *DevPtrDecl = nullptr;
7846 
7847   public:
7848     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7849         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7850     llvm::Value *operator*() const { return Ptr; }
7851     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7852     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7853   };
7854 
7855   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7856   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7857   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7858 
7859   /// Map between a struct and the its lowest & highest elements which have been
7860   /// mapped.
7861   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7862   ///                    HE(FieldIndex, Pointer)}
7863   struct StructRangeInfoTy {
7864     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7865         0, Address::invalid()};
7866     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7867         0, Address::invalid()};
7868     Address Base = Address::invalid();
7869   };
7870 
7871 private:
7872   /// Kind that defines how a device pointer has to be returned.
7873   struct MapInfo {
7874     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7875     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7876     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7877     bool ReturnDevicePointer = false;
7878     bool IsImplicit = false;
7879 
7880     MapInfo() = default;
7881     MapInfo(
7882         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7883         OpenMPMapClauseKind MapType,
7884         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7885         bool ReturnDevicePointer, bool IsImplicit)
7886         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7887           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7888   };
7889 
7890   /// If use_device_ptr is used on a pointer which is a struct member and there
7891   /// is no map information about it, then emission of that entry is deferred
7892   /// until the whole struct has been processed.
7893   struct DeferredDevicePtrEntryTy {
7894     const Expr *IE = nullptr;
7895     const ValueDecl *VD = nullptr;
7896 
7897     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7898         : IE(IE), VD(VD) {}
7899   };
7900 
7901   /// The target directive from where the mappable clauses were extracted. It
7902   /// is either a executable directive or a user-defined mapper directive.
7903   llvm::PointerUnion<const OMPExecutableDirective *,
7904                      const OMPDeclareMapperDecl *>
7905       CurDir;
7906 
7907   /// Function the directive is being generated for.
7908   CodeGenFunction &CGF;
7909 
7910   /// Set of all first private variables in the current directive.
7911   /// bool data is set to true if the variable is implicitly marked as
7912   /// firstprivate, false otherwise.
7913   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7914 
7915   /// Map between device pointer declarations and their expression components.
7916   /// The key value for declarations in 'this' is null.
7917   llvm::DenseMap<
7918       const ValueDecl *,
7919       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7920       DevPointersMap;
7921 
7922   llvm::Value *getExprTypeSize(const Expr *E) const {
7923     QualType ExprTy = E->getType().getCanonicalType();
7924 
7925     // Calculate the size for array shaping expression.
7926     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7927       llvm::Value *Size =
7928           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7929       for (const Expr *SE : OAE->getDimensions()) {
7930         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7931         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7932                                       CGF.getContext().getSizeType(),
7933                                       SE->getExprLoc());
7934         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7935       }
7936       return Size;
7937     }
7938 
7939     // Reference types are ignored for mapping purposes.
7940     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7941       ExprTy = RefTy->getPointeeType().getCanonicalType();
7942 
7943     // Given that an array section is considered a built-in type, we need to
7944     // do the calculation based on the length of the section instead of relying
7945     // on CGF.getTypeSize(E->getType()).
7946     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7947       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7948                             OAE->getBase()->IgnoreParenImpCasts())
7949                             .getCanonicalType();
7950 
7951       // If there is no length associated with the expression and lower bound is
7952       // not specified too, that means we are using the whole length of the
7953       // base.
7954       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7955           !OAE->getLowerBound())
7956         return CGF.getTypeSize(BaseTy);
7957 
7958       llvm::Value *ElemSize;
7959       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7960         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7961       } else {
7962         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7963         assert(ATy && "Expecting array type if not a pointer type.");
7964         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7965       }
7966 
7967       // If we don't have a length at this point, that is because we have an
7968       // array section with a single element.
7969       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7970         return ElemSize;
7971 
7972       if (const Expr *LenExpr = OAE->getLength()) {
7973         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7974         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7975                                              CGF.getContext().getSizeType(),
7976                                              LenExpr->getExprLoc());
7977         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7978       }
7979       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7980              OAE->getLowerBound() && "expected array_section[lb:].");
7981       // Size = sizetype - lb * elemtype;
7982       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7983       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7984       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7985                                        CGF.getContext().getSizeType(),
7986                                        OAE->getLowerBound()->getExprLoc());
7987       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7988       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7989       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7990       LengthVal = CGF.Builder.CreateSelect(
7991           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7992       return LengthVal;
7993     }
7994     return CGF.getTypeSize(ExprTy);
7995   }
7996 
7997   /// Return the corresponding bits for a given map clause modifier. Add
7998   /// a flag marking the map as a pointer if requested. Add a flag marking the
7999   /// map as the first one of a series of maps that relate to the same map
8000   /// expression.
8001   OpenMPOffloadMappingFlags getMapTypeBits(
8002       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
8003       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
8004     OpenMPOffloadMappingFlags Bits =
8005         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
8006     switch (MapType) {
8007     case OMPC_MAP_alloc:
8008     case OMPC_MAP_release:
8009       // alloc and release is the default behavior in the runtime library,  i.e.
8010       // if we don't pass any bits alloc/release that is what the runtime is
8011       // going to do. Therefore, we don't need to signal anything for these two
8012       // type modifiers.
8013       break;
8014     case OMPC_MAP_to:
8015       Bits |= OMP_MAP_TO;
8016       break;
8017     case OMPC_MAP_from:
8018       Bits |= OMP_MAP_FROM;
8019       break;
8020     case OMPC_MAP_tofrom:
8021       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
8022       break;
8023     case OMPC_MAP_delete:
8024       Bits |= OMP_MAP_DELETE;
8025       break;
8026     case OMPC_MAP_unknown:
8027       llvm_unreachable("Unexpected map type!");
8028     }
8029     if (AddPtrFlag)
8030       Bits |= OMP_MAP_PTR_AND_OBJ;
8031     if (AddIsTargetParamFlag)
8032       Bits |= OMP_MAP_TARGET_PARAM;
8033     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
8034         != MapModifiers.end())
8035       Bits |= OMP_MAP_ALWAYS;
8036     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
8037         != MapModifiers.end())
8038       Bits |= OMP_MAP_CLOSE;
8039     return Bits;
8040   }
8041 
8042   /// Return true if the provided expression is a final array section. A
8043   /// final array section, is one whose length can't be proved to be one.
8044   bool isFinalArraySectionExpression(const Expr *E) const {
8045     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
8046 
8047     // It is not an array section and therefore not a unity-size one.
8048     if (!OASE)
8049       return false;
8050 
8051     // An array section with no colon always refer to a single element.
8052     if (OASE->getColonLoc().isInvalid())
8053       return false;
8054 
8055     const Expr *Length = OASE->getLength();
8056 
8057     // If we don't have a length we have to check if the array has size 1
8058     // for this dimension. Also, we should always expect a length if the
8059     // base type is pointer.
8060     if (!Length) {
8061       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
8062                              OASE->getBase()->IgnoreParenImpCasts())
8063                              .getCanonicalType();
8064       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
8065         return ATy->getSize().getSExtValue() != 1;
8066       // If we don't have a constant dimension length, we have to consider
8067       // the current section as having any size, so it is not necessarily
8068       // unitary. If it happen to be unity size, that's user fault.
8069       return true;
8070     }
8071 
8072     // Check if the length evaluates to 1.
8073     Expr::EvalResult Result;
8074     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
8075       return true; // Can have more that size 1.
8076 
8077     llvm::APSInt ConstLength = Result.Val.getInt();
8078     return ConstLength.getSExtValue() != 1;
8079   }
8080 
8081   /// Generate the base pointers, section pointers, sizes and map type
8082   /// bits for the provided map type, map modifier, and expression components.
8083   /// \a IsFirstComponent should be set to true if the provided set of
8084   /// components is the first associated with a capture.
8085   void generateInfoForComponentList(
8086       OpenMPMapClauseKind MapType,
8087       ArrayRef<OpenMPMapModifierKind> MapModifiers,
8088       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8089       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8090       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8091       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
8092       bool IsImplicit,
8093       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8094           OverlappedElements = llvm::None) const {
8095     // The following summarizes what has to be generated for each map and the
8096     // types below. The generated information is expressed in this order:
8097     // base pointer, section pointer, size, flags
8098     // (to add to the ones that come from the map type and modifier).
8099     //
8100     // double d;
8101     // int i[100];
8102     // float *p;
8103     //
8104     // struct S1 {
8105     //   int i;
8106     //   float f[50];
8107     // }
8108     // struct S2 {
8109     //   int i;
8110     //   float f[50];
8111     //   S1 s;
8112     //   double *p;
8113     //   struct S2 *ps;
8114     // }
8115     // S2 s;
8116     // S2 *ps;
8117     //
8118     // map(d)
8119     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
8120     //
8121     // map(i)
8122     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
8123     //
8124     // map(i[1:23])
8125     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
8126     //
8127     // map(p)
8128     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
8129     //
8130     // map(p[1:24])
8131     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
8132     //
8133     // map(s)
8134     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
8135     //
8136     // map(s.i)
8137     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
8138     //
8139     // map(s.s.f)
8140     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8141     //
8142     // map(s.p)
8143     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
8144     //
8145     // map(to: s.p[:22])
8146     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
8147     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
8148     // &(s.p), &(s.p[0]), 22*sizeof(double),
8149     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
8150     // (*) alloc space for struct members, only this is a target parameter
8151     // (**) map the pointer (nothing to be mapped in this example) (the compiler
8152     //      optimizes this entry out, same in the examples below)
8153     // (***) map the pointee (map: to)
8154     //
8155     // map(s.ps)
8156     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8157     //
8158     // map(from: s.ps->s.i)
8159     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8160     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8161     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
8162     //
8163     // map(to: s.ps->ps)
8164     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8165     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8166     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
8167     //
8168     // map(s.ps->ps->ps)
8169     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8170     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8171     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8172     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8173     //
8174     // map(to: s.ps->ps->s.f[:22])
8175     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
8176     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
8177     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8178     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8179     //
8180     // map(ps)
8181     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
8182     //
8183     // map(ps->i)
8184     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
8185     //
8186     // map(ps->s.f)
8187     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
8188     //
8189     // map(from: ps->p)
8190     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
8191     //
8192     // map(to: ps->p[:22])
8193     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
8194     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
8195     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
8196     //
8197     // map(ps->ps)
8198     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
8199     //
8200     // map(from: ps->ps->s.i)
8201     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8202     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8203     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8204     //
8205     // map(from: ps->ps->ps)
8206     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8207     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8208     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8209     //
8210     // map(ps->ps->ps->ps)
8211     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8212     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8213     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8214     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
8215     //
8216     // map(to: ps->ps->ps->s.f[:22])
8217     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
8218     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
8219     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
8220     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
8221     //
8222     // map(to: s.f[:22]) map(from: s.p[:33])
8223     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
8224     //     sizeof(double*) (**), TARGET_PARAM
8225     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
8226     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
8227     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
8228     // (*) allocate contiguous space needed to fit all mapped members even if
8229     //     we allocate space for members not mapped (in this example,
8230     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
8231     //     them as well because they fall between &s.f[0] and &s.p)
8232     //
8233     // map(from: s.f[:22]) map(to: ps->p[:33])
8234     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
8235     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8236     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
8237     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
8238     // (*) the struct this entry pertains to is the 2nd element in the list of
8239     //     arguments, hence MEMBER_OF(2)
8240     //
8241     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
8242     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
8243     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
8244     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
8245     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
8246     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
8247     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
8248     // (*) the struct this entry pertains to is the 4th element in the list
8249     //     of arguments, hence MEMBER_OF(4)
8250 
8251     // Track if the map information being generated is the first for a capture.
8252     bool IsCaptureFirstInfo = IsFirstComponentList;
8253     // When the variable is on a declare target link or in a to clause with
8254     // unified memory, a reference is needed to hold the host/device address
8255     // of the variable.
8256     bool RequiresReference = false;
8257 
8258     // Scan the components from the base to the complete expression.
8259     auto CI = Components.rbegin();
8260     auto CE = Components.rend();
8261     auto I = CI;
8262 
8263     // Track if the map information being generated is the first for a list of
8264     // components.
8265     bool IsExpressionFirstInfo = true;
8266     Address BP = Address::invalid();
8267     const Expr *AssocExpr = I->getAssociatedExpression();
8268     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
8269     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8270     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
8271 
8272     if (isa<MemberExpr>(AssocExpr)) {
8273       // The base is the 'this' pointer. The content of the pointer is going
8274       // to be the base of the field being mapped.
8275       BP = CGF.LoadCXXThisAddress();
8276     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
8277                (OASE &&
8278                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
8279       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8280     } else if (OAShE &&
8281                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
8282       BP = Address(
8283           CGF.EmitScalarExpr(OAShE->getBase()),
8284           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
8285     } else {
8286       // The base is the reference to the variable.
8287       // BP = &Var.
8288       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
8289       if (const auto *VD =
8290               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
8291         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8292                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8293           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8294               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
8295                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
8296             RequiresReference = true;
8297             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
8298           }
8299         }
8300       }
8301 
8302       // If the variable is a pointer and is being dereferenced (i.e. is not
8303       // the last component), the base has to be the pointer itself, not its
8304       // reference. References are ignored for mapping purposes.
8305       QualType Ty =
8306           I->getAssociatedDeclaration()->getType().getNonReferenceType();
8307       if (Ty->isAnyPointerType() && std::next(I) != CE) {
8308         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8309 
8310         // We do not need to generate individual map information for the
8311         // pointer, it can be associated with the combined storage.
8312         ++I;
8313       }
8314     }
8315 
8316     // Track whether a component of the list should be marked as MEMBER_OF some
8317     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8318     // in a component list should be marked as MEMBER_OF, all subsequent entries
8319     // do not belong to the base struct. E.g.
8320     // struct S2 s;
8321     // s.ps->ps->ps->f[:]
8322     //   (1) (2) (3) (4)
8323     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8324     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8325     // is the pointee of ps(2) which is not member of struct s, so it should not
8326     // be marked as such (it is still PTR_AND_OBJ).
8327     // The variable is initialized to false so that PTR_AND_OBJ entries which
8328     // are not struct members are not considered (e.g. array of pointers to
8329     // data).
8330     bool ShouldBeMemberOf = false;
8331 
8332     // Variable keeping track of whether or not we have encountered a component
8333     // in the component list which is a member expression. Useful when we have a
8334     // pointer or a final array section, in which case it is the previous
8335     // component in the list which tells us whether we have a member expression.
8336     // E.g. X.f[:]
8337     // While processing the final array section "[:]" it is "f" which tells us
8338     // whether we are dealing with a member of a declared struct.
8339     const MemberExpr *EncounteredME = nullptr;
8340 
8341     for (; I != CE; ++I) {
8342       // If the current component is member of a struct (parent struct) mark it.
8343       if (!EncounteredME) {
8344         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
8345         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8346         // as MEMBER_OF the parent struct.
8347         if (EncounteredME)
8348           ShouldBeMemberOf = true;
8349       }
8350 
8351       auto Next = std::next(I);
8352 
8353       // We need to generate the addresses and sizes if this is the last
8354       // component, if the component is a pointer or if it is an array section
8355       // whose length can't be proved to be one. If this is a pointer, it
8356       // becomes the base address for the following components.
8357 
8358       // A final array section, is one whose length can't be proved to be one.
8359       bool IsFinalArraySection =
8360           isFinalArraySectionExpression(I->getAssociatedExpression());
8361 
8362       // Get information on whether the element is a pointer. Have to do a
8363       // special treatment for array sections given that they are built-in
8364       // types.
8365       const auto *OASE =
8366           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8367       const auto *OAShE =
8368           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8369       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8370       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8371       bool IsPointer =
8372           OAShE ||
8373           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8374                        .getCanonicalType()
8375                        ->isAnyPointerType()) ||
8376           I->getAssociatedExpression()->getType()->isAnyPointerType();
8377       bool IsNonDerefPointer = IsPointer && !UO && !BO;
8378 
8379       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
8380         // If this is not the last component, we expect the pointer to be
8381         // associated with an array expression or member expression.
8382         assert((Next == CE ||
8383                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8384                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8385                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8386                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8387                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8388                "Unexpected expression");
8389 
8390         Address LB = Address::invalid();
8391         if (OAShE) {
8392           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8393                        CGF.getContext().getTypeAlignInChars(
8394                            OAShE->getBase()->getType()));
8395         } else {
8396           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8397                    .getAddress(CGF);
8398         }
8399 
8400         // If this component is a pointer inside the base struct then we don't
8401         // need to create any entry for it - it will be combined with the object
8402         // it is pointing to into a single PTR_AND_OBJ entry.
8403         bool IsMemberPointer =
8404             IsPointer && EncounteredME &&
8405             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
8406              EncounteredME);
8407         if (!OverlappedElements.empty()) {
8408           // Handle base element with the info for overlapped elements.
8409           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8410           assert(Next == CE &&
8411                  "Expected last element for the overlapped elements.");
8412           assert(!IsPointer &&
8413                  "Unexpected base element with the pointer type.");
8414           // Mark the whole struct as the struct that requires allocation on the
8415           // device.
8416           PartialStruct.LowestElem = {0, LB};
8417           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8418               I->getAssociatedExpression()->getType());
8419           Address HB = CGF.Builder.CreateConstGEP(
8420               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
8421                                                               CGF.VoidPtrTy),
8422               TypeSize.getQuantity() - 1);
8423           PartialStruct.HighestElem = {
8424               std::numeric_limits<decltype(
8425                   PartialStruct.HighestElem.first)>::max(),
8426               HB};
8427           PartialStruct.Base = BP;
8428           // Emit data for non-overlapped data.
8429           OpenMPOffloadMappingFlags Flags =
8430               OMP_MAP_MEMBER_OF |
8431               getMapTypeBits(MapType, MapModifiers, IsImplicit,
8432                              /*AddPtrFlag=*/false,
8433                              /*AddIsTargetParamFlag=*/false);
8434           LB = BP;
8435           llvm::Value *Size = nullptr;
8436           // Do bitcopy of all non-overlapped structure elements.
8437           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8438                    Component : OverlappedElements) {
8439             Address ComponentLB = Address::invalid();
8440             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8441                  Component) {
8442               if (MC.getAssociatedDeclaration()) {
8443                 ComponentLB =
8444                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8445                         .getAddress(CGF);
8446                 Size = CGF.Builder.CreatePtrDiff(
8447                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8448                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8449                 break;
8450               }
8451             }
8452             BasePointers.push_back(BP.getPointer());
8453             Pointers.push_back(LB.getPointer());
8454             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
8455                                                       /*isSigned=*/true));
8456             Types.push_back(Flags);
8457             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8458           }
8459           BasePointers.push_back(BP.getPointer());
8460           Pointers.push_back(LB.getPointer());
8461           Size = CGF.Builder.CreatePtrDiff(
8462               CGF.EmitCastToVoidPtr(
8463                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
8464               CGF.EmitCastToVoidPtr(LB.getPointer()));
8465           Sizes.push_back(
8466               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8467           Types.push_back(Flags);
8468           break;
8469         }
8470         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8471         if (!IsMemberPointer) {
8472           BasePointers.push_back(BP.getPointer());
8473           Pointers.push_back(LB.getPointer());
8474           Sizes.push_back(
8475               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8476 
8477           // We need to add a pointer flag for each map that comes from the
8478           // same expression except for the first one. We also need to signal
8479           // this map is the first one that relates with the current capture
8480           // (there is a set of entries for each capture).
8481           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8482               MapType, MapModifiers, IsImplicit,
8483               !IsExpressionFirstInfo || RequiresReference,
8484               IsCaptureFirstInfo && !RequiresReference);
8485 
8486           if (!IsExpressionFirstInfo) {
8487             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8488             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8489             if (IsPointer)
8490               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8491                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8492 
8493             if (ShouldBeMemberOf) {
8494               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8495               // should be later updated with the correct value of MEMBER_OF.
8496               Flags |= OMP_MAP_MEMBER_OF;
8497               // From now on, all subsequent PTR_AND_OBJ entries should not be
8498               // marked as MEMBER_OF.
8499               ShouldBeMemberOf = false;
8500             }
8501           }
8502 
8503           Types.push_back(Flags);
8504         }
8505 
8506         // If we have encountered a member expression so far, keep track of the
8507         // mapped member. If the parent is "*this", then the value declaration
8508         // is nullptr.
8509         if (EncounteredME) {
8510           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8511           unsigned FieldIndex = FD->getFieldIndex();
8512 
8513           // Update info about the lowest and highest elements for this struct
8514           if (!PartialStruct.Base.isValid()) {
8515             PartialStruct.LowestElem = {FieldIndex, LB};
8516             PartialStruct.HighestElem = {FieldIndex, LB};
8517             PartialStruct.Base = BP;
8518           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8519             PartialStruct.LowestElem = {FieldIndex, LB};
8520           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8521             PartialStruct.HighestElem = {FieldIndex, LB};
8522           }
8523         }
8524 
8525         // If we have a final array section, we are done with this expression.
8526         if (IsFinalArraySection)
8527           break;
8528 
8529         // The pointer becomes the base for the next element.
8530         if (Next != CE)
8531           BP = LB;
8532 
8533         IsExpressionFirstInfo = false;
8534         IsCaptureFirstInfo = false;
8535       }
8536     }
8537   }
8538 
8539   /// Return the adjusted map modifiers if the declaration a capture refers to
8540   /// appears in a first-private clause. This is expected to be used only with
8541   /// directives that start with 'target'.
8542   MappableExprsHandler::OpenMPOffloadMappingFlags
8543   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8544     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8545 
8546     // A first private variable captured by reference will use only the
8547     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8548     // declaration is known as first-private in this handler.
8549     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8550       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8551           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8552         return MappableExprsHandler::OMP_MAP_ALWAYS |
8553                MappableExprsHandler::OMP_MAP_TO;
8554       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8555         return MappableExprsHandler::OMP_MAP_TO |
8556                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8557       return MappableExprsHandler::OMP_MAP_PRIVATE |
8558              MappableExprsHandler::OMP_MAP_TO;
8559     }
8560     return MappableExprsHandler::OMP_MAP_TO |
8561            MappableExprsHandler::OMP_MAP_FROM;
8562   }
8563 
8564   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8565     // Rotate by getFlagMemberOffset() bits.
8566     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8567                                                   << getFlagMemberOffset());
8568   }
8569 
8570   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8571                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8572     // If the entry is PTR_AND_OBJ but has not been marked with the special
8573     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8574     // marked as MEMBER_OF.
8575     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8576         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8577       return;
8578 
8579     // Reset the placeholder value to prepare the flag for the assignment of the
8580     // proper MEMBER_OF value.
8581     Flags &= ~OMP_MAP_MEMBER_OF;
8582     Flags |= MemberOfFlag;
8583   }
8584 
8585   void getPlainLayout(const CXXRecordDecl *RD,
8586                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8587                       bool AsBase) const {
8588     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8589 
8590     llvm::StructType *St =
8591         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8592 
8593     unsigned NumElements = St->getNumElements();
8594     llvm::SmallVector<
8595         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8596         RecordLayout(NumElements);
8597 
8598     // Fill bases.
8599     for (const auto &I : RD->bases()) {
8600       if (I.isVirtual())
8601         continue;
8602       const auto *Base = I.getType()->getAsCXXRecordDecl();
8603       // Ignore empty bases.
8604       if (Base->isEmpty() || CGF.getContext()
8605                                  .getASTRecordLayout(Base)
8606                                  .getNonVirtualSize()
8607                                  .isZero())
8608         continue;
8609 
8610       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8611       RecordLayout[FieldIndex] = Base;
8612     }
8613     // Fill in virtual bases.
8614     for (const auto &I : RD->vbases()) {
8615       const auto *Base = I.getType()->getAsCXXRecordDecl();
8616       // Ignore empty bases.
8617       if (Base->isEmpty())
8618         continue;
8619       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8620       if (RecordLayout[FieldIndex])
8621         continue;
8622       RecordLayout[FieldIndex] = Base;
8623     }
8624     // Fill in all the fields.
8625     assert(!RD->isUnion() && "Unexpected union.");
8626     for (const auto *Field : RD->fields()) {
8627       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8628       // will fill in later.)
8629       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8630         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8631         RecordLayout[FieldIndex] = Field;
8632       }
8633     }
8634     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8635              &Data : RecordLayout) {
8636       if (Data.isNull())
8637         continue;
8638       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8639         getPlainLayout(Base, Layout, /*AsBase=*/true);
8640       else
8641         Layout.push_back(Data.get<const FieldDecl *>());
8642     }
8643   }
8644 
8645 public:
8646   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8647       : CurDir(&Dir), CGF(CGF) {
8648     // Extract firstprivate clause information.
8649     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8650       for (const auto *D : C->varlists())
8651         FirstPrivateDecls.try_emplace(
8652             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8653     // Extract implicit firstprivates from uses_allocators clauses.
8654     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8655       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8656         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8657         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8658           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8659                                         /*Implicit=*/true);
8660         else if (const auto *VD = dyn_cast<VarDecl>(
8661                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8662                          ->getDecl()))
8663           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8664       }
8665     }
8666     // Extract device pointer clause information.
8667     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8668       for (auto L : C->component_lists())
8669         DevPointersMap[L.first].push_back(L.second);
8670   }
8671 
8672   /// Constructor for the declare mapper directive.
8673   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8674       : CurDir(&Dir), CGF(CGF) {}
8675 
8676   /// Generate code for the combined entry if we have a partially mapped struct
8677   /// and take care of the mapping flags of the arguments corresponding to
8678   /// individual struct members.
8679   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8680                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8681                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8682                          const StructRangeInfoTy &PartialStruct) const {
8683     // Base is the base of the struct
8684     BasePointers.push_back(PartialStruct.Base.getPointer());
8685     // Pointer is the address of the lowest element
8686     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8687     Pointers.push_back(LB);
8688     // Size is (addr of {highest+1} element) - (addr of lowest element)
8689     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8690     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8691     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8692     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8693     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8694     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8695                                                   /*isSigned=*/false);
8696     Sizes.push_back(Size);
8697     // Map type is always TARGET_PARAM
8698     Types.push_back(OMP_MAP_TARGET_PARAM);
8699     // Remove TARGET_PARAM flag from the first element
8700     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8701 
8702     // All other current entries will be MEMBER_OF the combined entry
8703     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8704     // 0xFFFF in the MEMBER_OF field).
8705     OpenMPOffloadMappingFlags MemberOfFlag =
8706         getMemberOfFlag(BasePointers.size() - 1);
8707     for (auto &M : CurTypes)
8708       setCorrectMemberOfFlag(M, MemberOfFlag);
8709   }
8710 
8711   /// Generate all the base pointers, section pointers, sizes and map
8712   /// types for the extracted mappable expressions. Also, for each item that
8713   /// relates with a device pointer, a pair of the relevant declaration and
8714   /// index where it occurs is appended to the device pointers info array.
8715   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8716                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8717                        MapFlagsArrayTy &Types) const {
8718     // We have to process the component lists that relate with the same
8719     // declaration in a single chunk so that we can generate the map flags
8720     // correctly. Therefore, we organize all lists in a map.
8721     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8722 
8723     // Helper function to fill the information map for the different supported
8724     // clauses.
8725     auto &&InfoGen = [&Info](
8726         const ValueDecl *D,
8727         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8728         OpenMPMapClauseKind MapType,
8729         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8730         bool ReturnDevicePointer, bool IsImplicit) {
8731       const ValueDecl *VD =
8732           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8733       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8734                             IsImplicit);
8735     };
8736 
8737     assert(CurDir.is<const OMPExecutableDirective *>() &&
8738            "Expect a executable directive");
8739     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8740     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8741       for (const auto L : C->component_lists()) {
8742         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8743             /*ReturnDevicePointer=*/false, C->isImplicit());
8744       }
8745     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8746       for (const auto L : C->component_lists()) {
8747         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8748             /*ReturnDevicePointer=*/false, C->isImplicit());
8749       }
8750     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8751       for (const auto L : C->component_lists()) {
8752         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8753             /*ReturnDevicePointer=*/false, C->isImplicit());
8754       }
8755 
8756     // Look at the use_device_ptr clause information and mark the existing map
8757     // entries as such. If there is no map information for an entry in the
8758     // use_device_ptr list, we create one with map type 'alloc' and zero size
8759     // section. It is the user fault if that was not mapped before. If there is
8760     // no map information and the pointer is a struct member, then we defer the
8761     // emission of that entry until the whole struct has been processed.
8762     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8763         DeferredInfo;
8764 
8765     for (const auto *C :
8766          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8767       for (const auto L : C->component_lists()) {
8768         assert(!L.second.empty() && "Not expecting empty list of components!");
8769         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8770         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8771         const Expr *IE = L.second.back().getAssociatedExpression();
8772         // If the first component is a member expression, we have to look into
8773         // 'this', which maps to null in the map of map information. Otherwise
8774         // look directly for the information.
8775         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8776 
8777         // We potentially have map information for this declaration already.
8778         // Look for the first set of components that refer to it.
8779         if (It != Info.end()) {
8780           auto CI = std::find_if(
8781               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8782                 return MI.Components.back().getAssociatedDeclaration() == VD;
8783               });
8784           // If we found a map entry, signal that the pointer has to be returned
8785           // and move on to the next declaration.
8786           if (CI != It->second.end()) {
8787             CI->ReturnDevicePointer = true;
8788             continue;
8789           }
8790         }
8791 
8792         // We didn't find any match in our map information - generate a zero
8793         // size array section - if the pointer is a struct member we defer this
8794         // action until the whole struct has been processed.
8795         if (isa<MemberExpr>(IE)) {
8796           // Insert the pointer into Info to be processed by
8797           // generateInfoForComponentList. Because it is a member pointer
8798           // without a pointee, no entry will be generated for it, therefore
8799           // we need to generate one after the whole struct has been processed.
8800           // Nonetheless, generateInfoForComponentList must be called to take
8801           // the pointer into account for the calculation of the range of the
8802           // partial struct.
8803           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8804                   /*ReturnDevicePointer=*/false, C->isImplicit());
8805           DeferredInfo[nullptr].emplace_back(IE, VD);
8806         } else {
8807           llvm::Value *Ptr =
8808               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8809           BasePointers.emplace_back(Ptr, VD);
8810           Pointers.push_back(Ptr);
8811           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8812           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8813         }
8814       }
8815     }
8816 
8817     for (const auto &M : Info) {
8818       // We need to know when we generate information for the first component
8819       // associated with a capture, because the mapping flags depend on it.
8820       bool IsFirstComponentList = true;
8821 
8822       // Temporary versions of arrays
8823       MapBaseValuesArrayTy CurBasePointers;
8824       MapValuesArrayTy CurPointers;
8825       MapValuesArrayTy CurSizes;
8826       MapFlagsArrayTy CurTypes;
8827       StructRangeInfoTy PartialStruct;
8828 
8829       for (const MapInfo &L : M.second) {
8830         assert(!L.Components.empty() &&
8831                "Not expecting declaration with no component lists.");
8832 
8833         // Remember the current base pointer index.
8834         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8835         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8836                                      CurBasePointers, CurPointers, CurSizes,
8837                                      CurTypes, PartialStruct,
8838                                      IsFirstComponentList, L.IsImplicit);
8839 
8840         // If this entry relates with a device pointer, set the relevant
8841         // declaration and add the 'return pointer' flag.
8842         if (L.ReturnDevicePointer) {
8843           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8844                  "Unexpected number of mapped base pointers.");
8845 
8846           const ValueDecl *RelevantVD =
8847               L.Components.back().getAssociatedDeclaration();
8848           assert(RelevantVD &&
8849                  "No relevant declaration related with device pointer??");
8850 
8851           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8852           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8853         }
8854         IsFirstComponentList = false;
8855       }
8856 
8857       // Append any pending zero-length pointers which are struct members and
8858       // used with use_device_ptr.
8859       auto CI = DeferredInfo.find(M.first);
8860       if (CI != DeferredInfo.end()) {
8861         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8862           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8863           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8864               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8865           CurBasePointers.emplace_back(BasePtr, L.VD);
8866           CurPointers.push_back(Ptr);
8867           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8868           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8869           // value MEMBER_OF=FFFF so that the entry is later updated with the
8870           // correct value of MEMBER_OF.
8871           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8872                              OMP_MAP_MEMBER_OF);
8873         }
8874       }
8875 
8876       // If there is an entry in PartialStruct it means we have a struct with
8877       // individual members mapped. Emit an extra combined entry.
8878       if (PartialStruct.Base.isValid())
8879         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8880                           PartialStruct);
8881 
8882       // We need to append the results of this capture to what we already have.
8883       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8884       Pointers.append(CurPointers.begin(), CurPointers.end());
8885       Sizes.append(CurSizes.begin(), CurSizes.end());
8886       Types.append(CurTypes.begin(), CurTypes.end());
8887     }
8888   }
8889 
8890   /// Generate all the base pointers, section pointers, sizes and map types for
8891   /// the extracted map clauses of user-defined mapper.
8892   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8893                                 MapValuesArrayTy &Pointers,
8894                                 MapValuesArrayTy &Sizes,
8895                                 MapFlagsArrayTy &Types) const {
8896     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8897            "Expect a declare mapper directive");
8898     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8899     // We have to process the component lists that relate with the same
8900     // declaration in a single chunk so that we can generate the map flags
8901     // correctly. Therefore, we organize all lists in a map.
8902     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8903 
8904     // Helper function to fill the information map for the different supported
8905     // clauses.
8906     auto &&InfoGen = [&Info](
8907         const ValueDecl *D,
8908         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8909         OpenMPMapClauseKind MapType,
8910         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8911         bool ReturnDevicePointer, bool IsImplicit) {
8912       const ValueDecl *VD =
8913           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8914       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8915                             IsImplicit);
8916     };
8917 
8918     for (const auto *C : CurMapperDir->clauselists()) {
8919       const auto *MC = cast<OMPMapClause>(C);
8920       for (const auto L : MC->component_lists()) {
8921         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8922                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8923       }
8924     }
8925 
8926     for (const auto &M : Info) {
8927       // We need to know when we generate information for the first component
8928       // associated with a capture, because the mapping flags depend on it.
8929       bool IsFirstComponentList = true;
8930 
8931       // Temporary versions of arrays
8932       MapBaseValuesArrayTy CurBasePointers;
8933       MapValuesArrayTy CurPointers;
8934       MapValuesArrayTy CurSizes;
8935       MapFlagsArrayTy CurTypes;
8936       StructRangeInfoTy PartialStruct;
8937 
8938       for (const MapInfo &L : M.second) {
8939         assert(!L.Components.empty() &&
8940                "Not expecting declaration with no component lists.");
8941         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8942                                      CurBasePointers, CurPointers, CurSizes,
8943                                      CurTypes, PartialStruct,
8944                                      IsFirstComponentList, L.IsImplicit);
8945         IsFirstComponentList = false;
8946       }
8947 
8948       // If there is an entry in PartialStruct it means we have a struct with
8949       // individual members mapped. Emit an extra combined entry.
8950       if (PartialStruct.Base.isValid())
8951         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8952                           PartialStruct);
8953 
8954       // We need to append the results of this capture to what we already have.
8955       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8956       Pointers.append(CurPointers.begin(), CurPointers.end());
8957       Sizes.append(CurSizes.begin(), CurSizes.end());
8958       Types.append(CurTypes.begin(), CurTypes.end());
8959     }
8960   }
8961 
8962   /// Emit capture info for lambdas for variables captured by reference.
8963   void generateInfoForLambdaCaptures(
8964       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8965       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8966       MapFlagsArrayTy &Types,
8967       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8968     const auto *RD = VD->getType()
8969                          .getCanonicalType()
8970                          .getNonReferenceType()
8971                          ->getAsCXXRecordDecl();
8972     if (!RD || !RD->isLambda())
8973       return;
8974     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8975     LValue VDLVal = CGF.MakeAddrLValue(
8976         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8977     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8978     FieldDecl *ThisCapture = nullptr;
8979     RD->getCaptureFields(Captures, ThisCapture);
8980     if (ThisCapture) {
8981       LValue ThisLVal =
8982           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8983       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8984       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8985                                  VDLVal.getPointer(CGF));
8986       BasePointers.push_back(ThisLVal.getPointer(CGF));
8987       Pointers.push_back(ThisLValVal.getPointer(CGF));
8988       Sizes.push_back(
8989           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8990                                     CGF.Int64Ty, /*isSigned=*/true));
8991       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8992                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8993     }
8994     for (const LambdaCapture &LC : RD->captures()) {
8995       if (!LC.capturesVariable())
8996         continue;
8997       const VarDecl *VD = LC.getCapturedVar();
8998       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8999         continue;
9000       auto It = Captures.find(VD);
9001       assert(It != Captures.end() && "Found lambda capture without field.");
9002       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9003       if (LC.getCaptureKind() == LCK_ByRef) {
9004         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9005         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9006                                    VDLVal.getPointer(CGF));
9007         BasePointers.push_back(VarLVal.getPointer(CGF));
9008         Pointers.push_back(VarLValVal.getPointer(CGF));
9009         Sizes.push_back(CGF.Builder.CreateIntCast(
9010             CGF.getTypeSize(
9011                 VD->getType().getCanonicalType().getNonReferenceType()),
9012             CGF.Int64Ty, /*isSigned=*/true));
9013       } else {
9014         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9015         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9016                                    VDLVal.getPointer(CGF));
9017         BasePointers.push_back(VarLVal.getPointer(CGF));
9018         Pointers.push_back(VarRVal.getScalarVal());
9019         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9020       }
9021       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9022                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9023     }
9024   }
9025 
9026   /// Set correct indices for lambdas captures.
9027   void adjustMemberOfForLambdaCaptures(
9028       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9029       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9030       MapFlagsArrayTy &Types) const {
9031     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9032       // Set correct member_of idx for all implicit lambda captures.
9033       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9034                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9035         continue;
9036       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9037       assert(BasePtr && "Unable to find base lambda address.");
9038       int TgtIdx = -1;
9039       for (unsigned J = I; J > 0; --J) {
9040         unsigned Idx = J - 1;
9041         if (Pointers[Idx] != BasePtr)
9042           continue;
9043         TgtIdx = Idx;
9044         break;
9045       }
9046       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9047       // All other current entries will be MEMBER_OF the combined entry
9048       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9049       // 0xFFFF in the MEMBER_OF field).
9050       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9051       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9052     }
9053   }
9054 
9055   /// Generate the base pointers, section pointers, sizes and map types
9056   /// associated to a given capture.
9057   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9058                               llvm::Value *Arg,
9059                               MapBaseValuesArrayTy &BasePointers,
9060                               MapValuesArrayTy &Pointers,
9061                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
9062                               StructRangeInfoTy &PartialStruct) const {
9063     assert(!Cap->capturesVariableArrayType() &&
9064            "Not expecting to generate map info for a variable array type!");
9065 
9066     // We need to know when we generating information for the first component
9067     const ValueDecl *VD = Cap->capturesThis()
9068                               ? nullptr
9069                               : Cap->getCapturedVar()->getCanonicalDecl();
9070 
9071     // If this declaration appears in a is_device_ptr clause we just have to
9072     // pass the pointer by value. If it is a reference to a declaration, we just
9073     // pass its value.
9074     if (DevPointersMap.count(VD)) {
9075       BasePointers.emplace_back(Arg, VD);
9076       Pointers.push_back(Arg);
9077       Sizes.push_back(
9078           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9079                                     CGF.Int64Ty, /*isSigned=*/true));
9080       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
9081       return;
9082     }
9083 
9084     using MapData =
9085         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9086                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
9087     SmallVector<MapData, 4> DeclComponentLists;
9088     assert(CurDir.is<const OMPExecutableDirective *>() &&
9089            "Expect a executable directive");
9090     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9091     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9092       for (const auto L : C->decl_component_lists(VD)) {
9093         assert(L.first == VD &&
9094                "We got information for the wrong declaration??");
9095         assert(!L.second.empty() &&
9096                "Not expecting declaration with no component lists.");
9097         DeclComponentLists.emplace_back(L.second, C->getMapType(),
9098                                         C->getMapTypeModifiers(),
9099                                         C->isImplicit());
9100       }
9101     }
9102 
9103     // Find overlapping elements (including the offset from the base element).
9104     llvm::SmallDenseMap<
9105         const MapData *,
9106         llvm::SmallVector<
9107             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9108         4>
9109         OverlappedData;
9110     size_t Count = 0;
9111     for (const MapData &L : DeclComponentLists) {
9112       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9113       OpenMPMapClauseKind MapType;
9114       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9115       bool IsImplicit;
9116       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9117       ++Count;
9118       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9119         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9120         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
9121         auto CI = Components.rbegin();
9122         auto CE = Components.rend();
9123         auto SI = Components1.rbegin();
9124         auto SE = Components1.rend();
9125         for (; CI != CE && SI != SE; ++CI, ++SI) {
9126           if (CI->getAssociatedExpression()->getStmtClass() !=
9127               SI->getAssociatedExpression()->getStmtClass())
9128             break;
9129           // Are we dealing with different variables/fields?
9130           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9131             break;
9132         }
9133         // Found overlapping if, at least for one component, reached the head of
9134         // the components list.
9135         if (CI == CE || SI == SE) {
9136           assert((CI != CE || SI != SE) &&
9137                  "Unexpected full match of the mapping components.");
9138           const MapData &BaseData = CI == CE ? L : L1;
9139           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9140               SI == SE ? Components : Components1;
9141           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9142           OverlappedElements.getSecond().push_back(SubData);
9143         }
9144       }
9145     }
9146     // Sort the overlapped elements for each item.
9147     llvm::SmallVector<const FieldDecl *, 4> Layout;
9148     if (!OverlappedData.empty()) {
9149       if (const auto *CRD =
9150               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
9151         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9152       else {
9153         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
9154         Layout.append(RD->field_begin(), RD->field_end());
9155       }
9156     }
9157     for (auto &Pair : OverlappedData) {
9158       llvm::sort(
9159           Pair.getSecond(),
9160           [&Layout](
9161               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9162               OMPClauseMappableExprCommon::MappableExprComponentListRef
9163                   Second) {
9164             auto CI = First.rbegin();
9165             auto CE = First.rend();
9166             auto SI = Second.rbegin();
9167             auto SE = Second.rend();
9168             for (; CI != CE && SI != SE; ++CI, ++SI) {
9169               if (CI->getAssociatedExpression()->getStmtClass() !=
9170                   SI->getAssociatedExpression()->getStmtClass())
9171                 break;
9172               // Are we dealing with different variables/fields?
9173               if (CI->getAssociatedDeclaration() !=
9174                   SI->getAssociatedDeclaration())
9175                 break;
9176             }
9177 
9178             // Lists contain the same elements.
9179             if (CI == CE && SI == SE)
9180               return false;
9181 
9182             // List with less elements is less than list with more elements.
9183             if (CI == CE || SI == SE)
9184               return CI == CE;
9185 
9186             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9187             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9188             if (FD1->getParent() == FD2->getParent())
9189               return FD1->getFieldIndex() < FD2->getFieldIndex();
9190             const auto It =
9191                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9192                   return FD == FD1 || FD == FD2;
9193                 });
9194             return *It == FD1;
9195           });
9196     }
9197 
9198     // Associated with a capture, because the mapping flags depend on it.
9199     // Go through all of the elements with the overlapped elements.
9200     for (const auto &Pair : OverlappedData) {
9201       const MapData &L = *Pair.getFirst();
9202       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9203       OpenMPMapClauseKind MapType;
9204       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9205       bool IsImplicit;
9206       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9207       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9208           OverlappedComponents = Pair.getSecond();
9209       bool IsFirstComponentList = true;
9210       generateInfoForComponentList(MapType, MapModifiers, Components,
9211                                    BasePointers, Pointers, Sizes, Types,
9212                                    PartialStruct, IsFirstComponentList,
9213                                    IsImplicit, OverlappedComponents);
9214     }
9215     // Go through other elements without overlapped elements.
9216     bool IsFirstComponentList = OverlappedData.empty();
9217     for (const MapData &L : DeclComponentLists) {
9218       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9219       OpenMPMapClauseKind MapType;
9220       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9221       bool IsImplicit;
9222       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
9223       auto It = OverlappedData.find(&L);
9224       if (It == OverlappedData.end())
9225         generateInfoForComponentList(MapType, MapModifiers, Components,
9226                                      BasePointers, Pointers, Sizes, Types,
9227                                      PartialStruct, IsFirstComponentList,
9228                                      IsImplicit);
9229       IsFirstComponentList = false;
9230     }
9231   }
9232 
9233   /// Generate the base pointers, section pointers, sizes and map types
9234   /// associated with the declare target link variables.
9235   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
9236                                         MapValuesArrayTy &Pointers,
9237                                         MapValuesArrayTy &Sizes,
9238                                         MapFlagsArrayTy &Types) const {
9239     assert(CurDir.is<const OMPExecutableDirective *>() &&
9240            "Expect a executable directive");
9241     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9242     // Map other list items in the map clause which are not captured variables
9243     // but "declare target link" global variables.
9244     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9245       for (const auto L : C->component_lists()) {
9246         if (!L.first)
9247           continue;
9248         const auto *VD = dyn_cast<VarDecl>(L.first);
9249         if (!VD)
9250           continue;
9251         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9252             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9253         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9254             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
9255           continue;
9256         StructRangeInfoTy PartialStruct;
9257         generateInfoForComponentList(
9258             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
9259             Pointers, Sizes, Types, PartialStruct,
9260             /*IsFirstComponentList=*/true, C->isImplicit());
9261         assert(!PartialStruct.Base.isValid() &&
9262                "No partial structs for declare target link expected.");
9263       }
9264     }
9265   }
9266 
9267   /// Generate the default map information for a given capture \a CI,
9268   /// record field declaration \a RI and captured value \a CV.
9269   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9270                               const FieldDecl &RI, llvm::Value *CV,
9271                               MapBaseValuesArrayTy &CurBasePointers,
9272                               MapValuesArrayTy &CurPointers,
9273                               MapValuesArrayTy &CurSizes,
9274                               MapFlagsArrayTy &CurMapTypes) const {
9275     bool IsImplicit = true;
9276     // Do the default mapping.
9277     if (CI.capturesThis()) {
9278       CurBasePointers.push_back(CV);
9279       CurPointers.push_back(CV);
9280       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9281       CurSizes.push_back(
9282           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9283                                     CGF.Int64Ty, /*isSigned=*/true));
9284       // Default map type.
9285       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9286     } else if (CI.capturesVariableByCopy()) {
9287       CurBasePointers.push_back(CV);
9288       CurPointers.push_back(CV);
9289       if (!RI.getType()->isAnyPointerType()) {
9290         // We have to signal to the runtime captures passed by value that are
9291         // not pointers.
9292         CurMapTypes.push_back(OMP_MAP_LITERAL);
9293         CurSizes.push_back(CGF.Builder.CreateIntCast(
9294             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9295       } else {
9296         // Pointers are implicitly mapped with a zero size and no flags
9297         // (other than first map that is added for all implicit maps).
9298         CurMapTypes.push_back(OMP_MAP_NONE);
9299         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9300       }
9301       const VarDecl *VD = CI.getCapturedVar();
9302       auto I = FirstPrivateDecls.find(VD);
9303       if (I != FirstPrivateDecls.end())
9304         IsImplicit = I->getSecond();
9305     } else {
9306       assert(CI.capturesVariable() && "Expected captured reference.");
9307       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9308       QualType ElementType = PtrTy->getPointeeType();
9309       CurSizes.push_back(CGF.Builder.CreateIntCast(
9310           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9311       // The default map type for a scalar/complex type is 'to' because by
9312       // default the value doesn't have to be retrieved. For an aggregate
9313       // type, the default is 'tofrom'.
9314       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
9315       const VarDecl *VD = CI.getCapturedVar();
9316       auto I = FirstPrivateDecls.find(VD);
9317       if (I != FirstPrivateDecls.end() &&
9318           VD->getType().isConstant(CGF.getContext())) {
9319         llvm::Constant *Addr =
9320             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9321         // Copy the value of the original variable to the new global copy.
9322         CGF.Builder.CreateMemCpy(
9323             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9324             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9325             CurSizes.back(), /*IsVolatile=*/false);
9326         // Use new global variable as the base pointers.
9327         CurBasePointers.push_back(Addr);
9328         CurPointers.push_back(Addr);
9329       } else {
9330         CurBasePointers.push_back(CV);
9331         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9332           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9333               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9334               AlignmentSource::Decl));
9335           CurPointers.push_back(PtrAddr.getPointer());
9336         } else {
9337           CurPointers.push_back(CV);
9338         }
9339       }
9340       if (I != FirstPrivateDecls.end())
9341         IsImplicit = I->getSecond();
9342     }
9343     // Every default map produces a single argument which is a target parameter.
9344     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
9345 
9346     // Add flag stating this is an implicit map.
9347     if (IsImplicit)
9348       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
9349   }
9350 };
9351 } // anonymous namespace
9352 
9353 /// Emit the arrays used to pass the captures and map information to the
9354 /// offloading runtime library. If there is no map or capture information,
9355 /// return nullptr by reference.
9356 static void
9357 emitOffloadingArrays(CodeGenFunction &CGF,
9358                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
9359                      MappableExprsHandler::MapValuesArrayTy &Pointers,
9360                      MappableExprsHandler::MapValuesArrayTy &Sizes,
9361                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
9362                      CGOpenMPRuntime::TargetDataInfo &Info) {
9363   CodeGenModule &CGM = CGF.CGM;
9364   ASTContext &Ctx = CGF.getContext();
9365 
9366   // Reset the array information.
9367   Info.clearArrayInfo();
9368   Info.NumberOfPtrs = BasePointers.size();
9369 
9370   if (Info.NumberOfPtrs) {
9371     // Detect if we have any capture size requiring runtime evaluation of the
9372     // size so that a constant array could be eventually used.
9373     bool hasRuntimeEvaluationCaptureSize = false;
9374     for (llvm::Value *S : Sizes)
9375       if (!isa<llvm::Constant>(S)) {
9376         hasRuntimeEvaluationCaptureSize = true;
9377         break;
9378       }
9379 
9380     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9381     QualType PointerArrayType = Ctx.getConstantArrayType(
9382         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9383         /*IndexTypeQuals=*/0);
9384 
9385     Info.BasePointersArray =
9386         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9387     Info.PointersArray =
9388         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9389 
9390     // If we don't have any VLA types or other types that require runtime
9391     // evaluation, we can use a constant array for the map sizes, otherwise we
9392     // need to fill up the arrays as we do for the pointers.
9393     QualType Int64Ty =
9394         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9395     if (hasRuntimeEvaluationCaptureSize) {
9396       QualType SizeArrayType = Ctx.getConstantArrayType(
9397           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9398           /*IndexTypeQuals=*/0);
9399       Info.SizesArray =
9400           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9401     } else {
9402       // We expect all the sizes to be constant, so we collect them to create
9403       // a constant array.
9404       SmallVector<llvm::Constant *, 16> ConstSizes;
9405       for (llvm::Value *S : Sizes)
9406         ConstSizes.push_back(cast<llvm::Constant>(S));
9407 
9408       auto *SizesArrayInit = llvm::ConstantArray::get(
9409           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9410       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9411       auto *SizesArrayGbl = new llvm::GlobalVariable(
9412           CGM.getModule(), SizesArrayInit->getType(),
9413           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9414           SizesArrayInit, Name);
9415       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9416       Info.SizesArray = SizesArrayGbl;
9417     }
9418 
9419     // The map types are always constant so we don't need to generate code to
9420     // fill arrays. Instead, we create an array constant.
9421     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
9422     llvm::copy(MapTypes, Mapping.begin());
9423     llvm::Constant *MapTypesArrayInit =
9424         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9425     std::string MaptypesName =
9426         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9427     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9428         CGM.getModule(), MapTypesArrayInit->getType(),
9429         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9430         MapTypesArrayInit, MaptypesName);
9431     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9432     Info.MapTypesArray = MapTypesArrayGbl;
9433 
9434     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9435       llvm::Value *BPVal = *BasePointers[I];
9436       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9437           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9438           Info.BasePointersArray, 0, I);
9439       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9440           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9441       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9442       CGF.Builder.CreateStore(BPVal, BPAddr);
9443 
9444       if (Info.requiresDevicePointerInfo())
9445         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
9446           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9447 
9448       llvm::Value *PVal = Pointers[I];
9449       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9450           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9451           Info.PointersArray, 0, I);
9452       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9453           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9454       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9455       CGF.Builder.CreateStore(PVal, PAddr);
9456 
9457       if (hasRuntimeEvaluationCaptureSize) {
9458         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9459             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9460             Info.SizesArray,
9461             /*Idx0=*/0,
9462             /*Idx1=*/I);
9463         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9464         CGF.Builder.CreateStore(
9465             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
9466             SAddr);
9467       }
9468     }
9469   }
9470 }
9471 
9472 /// Emit the arguments to be passed to the runtime library based on the
9473 /// arrays of pointers, sizes and map types.
9474 static void emitOffloadingArraysArgument(
9475     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9476     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9477     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
9478   CodeGenModule &CGM = CGF.CGM;
9479   if (Info.NumberOfPtrs) {
9480     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9481         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9482         Info.BasePointersArray,
9483         /*Idx0=*/0, /*Idx1=*/0);
9484     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9485         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9486         Info.PointersArray,
9487         /*Idx0=*/0,
9488         /*Idx1=*/0);
9489     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9490         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9491         /*Idx0=*/0, /*Idx1=*/0);
9492     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9493         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9494         Info.MapTypesArray,
9495         /*Idx0=*/0,
9496         /*Idx1=*/0);
9497   } else {
9498     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9499     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9500     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9501     MapTypesArrayArg =
9502         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9503   }
9504 }
9505 
9506 /// Check for inner distribute directive.
9507 static const OMPExecutableDirective *
9508 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9509   const auto *CS = D.getInnermostCapturedStmt();
9510   const auto *Body =
9511       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9512   const Stmt *ChildStmt =
9513       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9514 
9515   if (const auto *NestedDir =
9516           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9517     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9518     switch (D.getDirectiveKind()) {
9519     case OMPD_target:
9520       if (isOpenMPDistributeDirective(DKind))
9521         return NestedDir;
9522       if (DKind == OMPD_teams) {
9523         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9524             /*IgnoreCaptured=*/true);
9525         if (!Body)
9526           return nullptr;
9527         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9528         if (const auto *NND =
9529                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9530           DKind = NND->getDirectiveKind();
9531           if (isOpenMPDistributeDirective(DKind))
9532             return NND;
9533         }
9534       }
9535       return nullptr;
9536     case OMPD_target_teams:
9537       if (isOpenMPDistributeDirective(DKind))
9538         return NestedDir;
9539       return nullptr;
9540     case OMPD_target_parallel:
9541     case OMPD_target_simd:
9542     case OMPD_target_parallel_for:
9543     case OMPD_target_parallel_for_simd:
9544       return nullptr;
9545     case OMPD_target_teams_distribute:
9546     case OMPD_target_teams_distribute_simd:
9547     case OMPD_target_teams_distribute_parallel_for:
9548     case OMPD_target_teams_distribute_parallel_for_simd:
9549     case OMPD_parallel:
9550     case OMPD_for:
9551     case OMPD_parallel_for:
9552     case OMPD_parallel_master:
9553     case OMPD_parallel_sections:
9554     case OMPD_for_simd:
9555     case OMPD_parallel_for_simd:
9556     case OMPD_cancel:
9557     case OMPD_cancellation_point:
9558     case OMPD_ordered:
9559     case OMPD_threadprivate:
9560     case OMPD_allocate:
9561     case OMPD_task:
9562     case OMPD_simd:
9563     case OMPD_sections:
9564     case OMPD_section:
9565     case OMPD_single:
9566     case OMPD_master:
9567     case OMPD_critical:
9568     case OMPD_taskyield:
9569     case OMPD_barrier:
9570     case OMPD_taskwait:
9571     case OMPD_taskgroup:
9572     case OMPD_atomic:
9573     case OMPD_flush:
9574     case OMPD_depobj:
9575     case OMPD_scan:
9576     case OMPD_teams:
9577     case OMPD_target_data:
9578     case OMPD_target_exit_data:
9579     case OMPD_target_enter_data:
9580     case OMPD_distribute:
9581     case OMPD_distribute_simd:
9582     case OMPD_distribute_parallel_for:
9583     case OMPD_distribute_parallel_for_simd:
9584     case OMPD_teams_distribute:
9585     case OMPD_teams_distribute_simd:
9586     case OMPD_teams_distribute_parallel_for:
9587     case OMPD_teams_distribute_parallel_for_simd:
9588     case OMPD_target_update:
9589     case OMPD_declare_simd:
9590     case OMPD_declare_variant:
9591     case OMPD_begin_declare_variant:
9592     case OMPD_end_declare_variant:
9593     case OMPD_declare_target:
9594     case OMPD_end_declare_target:
9595     case OMPD_declare_reduction:
9596     case OMPD_declare_mapper:
9597     case OMPD_taskloop:
9598     case OMPD_taskloop_simd:
9599     case OMPD_master_taskloop:
9600     case OMPD_master_taskloop_simd:
9601     case OMPD_parallel_master_taskloop:
9602     case OMPD_parallel_master_taskloop_simd:
9603     case OMPD_requires:
9604     case OMPD_unknown:
9605       llvm_unreachable("Unexpected directive.");
9606     }
9607   }
9608 
9609   return nullptr;
9610 }
9611 
9612 /// Emit the user-defined mapper function. The code generation follows the
9613 /// pattern in the example below.
9614 /// \code
9615 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9616 ///                                           void *base, void *begin,
9617 ///                                           int64_t size, int64_t type) {
9618 ///   // Allocate space for an array section first.
9619 ///   if (size > 1 && !maptype.IsDelete)
9620 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9621 ///                                 size*sizeof(Ty), clearToFrom(type));
9622 ///   // Map members.
9623 ///   for (unsigned i = 0; i < size; i++) {
9624 ///     // For each component specified by this mapper:
9625 ///     for (auto c : all_components) {
9626 ///       if (c.hasMapper())
9627 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9628 ///                       c.arg_type);
9629 ///       else
9630 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9631 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9632 ///     }
9633 ///   }
9634 ///   // Delete the array section.
9635 ///   if (size > 1 && maptype.IsDelete)
9636 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9637 ///                                 size*sizeof(Ty), clearToFrom(type));
9638 /// }
9639 /// \endcode
9640 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9641                                             CodeGenFunction *CGF) {
9642   if (UDMMap.count(D) > 0)
9643     return;
9644   ASTContext &C = CGM.getContext();
9645   QualType Ty = D->getType();
9646   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9647   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9648   auto *MapperVarDecl =
9649       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9650   SourceLocation Loc = D->getLocation();
9651   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9652 
9653   // Prepare mapper function arguments and attributes.
9654   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9655                               C.VoidPtrTy, ImplicitParamDecl::Other);
9656   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9657                             ImplicitParamDecl::Other);
9658   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9659                              C.VoidPtrTy, ImplicitParamDecl::Other);
9660   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9661                             ImplicitParamDecl::Other);
9662   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9663                             ImplicitParamDecl::Other);
9664   FunctionArgList Args;
9665   Args.push_back(&HandleArg);
9666   Args.push_back(&BaseArg);
9667   Args.push_back(&BeginArg);
9668   Args.push_back(&SizeArg);
9669   Args.push_back(&TypeArg);
9670   const CGFunctionInfo &FnInfo =
9671       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9672   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9673   SmallString<64> TyStr;
9674   llvm::raw_svector_ostream Out(TyStr);
9675   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9676   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9677   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9678                                     Name, &CGM.getModule());
9679   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9680   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9681   // Start the mapper function code generation.
9682   CodeGenFunction MapperCGF(CGM);
9683   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9684   // Compute the starting and end addreses of array elements.
9685   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9686       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9687       C.getPointerType(Int64Ty), Loc);
9688   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9689       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9690       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9691   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9692   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9693       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9694       C.getPointerType(Int64Ty), Loc);
9695   // Prepare common arguments for array initiation and deletion.
9696   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9697       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9698       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9699   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9700       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9701       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9702   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9703       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9704       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9705 
9706   // Emit array initiation if this is an array section and \p MapType indicates
9707   // that memory allocation is required.
9708   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9709   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9710                              ElementSize, HeadBB, /*IsInit=*/true);
9711 
9712   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9713 
9714   // Emit the loop header block.
9715   MapperCGF.EmitBlock(HeadBB);
9716   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9717   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9718   // Evaluate whether the initial condition is satisfied.
9719   llvm::Value *IsEmpty =
9720       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9721   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9722   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9723 
9724   // Emit the loop body block.
9725   MapperCGF.EmitBlock(BodyBB);
9726   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9727       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9728   PtrPHI->addIncoming(PtrBegin, EntryBB);
9729   Address PtrCurrent =
9730       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9731                           .getAlignment()
9732                           .alignmentOfArrayElement(ElementSize));
9733   // Privatize the declared variable of mapper to be the current array element.
9734   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9735   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9736     return MapperCGF
9737         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9738         .getAddress(MapperCGF);
9739   });
9740   (void)Scope.Privatize();
9741 
9742   // Get map clause information. Fill up the arrays with all mapped variables.
9743   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9744   MappableExprsHandler::MapValuesArrayTy Pointers;
9745   MappableExprsHandler::MapValuesArrayTy Sizes;
9746   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9747   MappableExprsHandler MEHandler(*D, MapperCGF);
9748   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9749 
9750   // Call the runtime API __tgt_mapper_num_components to get the number of
9751   // pre-existing components.
9752   llvm::Value *OffloadingArgs[] = {Handle};
9753   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9754       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9755   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9756       PreviousSize,
9757       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9758 
9759   // Fill up the runtime mapper handle for all components.
9760   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9761     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9762         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9763     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9764         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9765     llvm::Value *CurSizeArg = Sizes[I];
9766 
9767     // Extract the MEMBER_OF field from the map type.
9768     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9769     MapperCGF.EmitBlock(MemberBB);
9770     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9771     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9772         OriMapType,
9773         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9774     llvm::BasicBlock *MemberCombineBB =
9775         MapperCGF.createBasicBlock("omp.member.combine");
9776     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9777     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9778     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9779     // Add the number of pre-existing components to the MEMBER_OF field if it
9780     // is valid.
9781     MapperCGF.EmitBlock(MemberCombineBB);
9782     llvm::Value *CombinedMember =
9783         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9784     // Do nothing if it is not a member of previous components.
9785     MapperCGF.EmitBlock(TypeBB);
9786     llvm::PHINode *MemberMapType =
9787         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9788     MemberMapType->addIncoming(OriMapType, MemberBB);
9789     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9790 
9791     // Combine the map type inherited from user-defined mapper with that
9792     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9793     // bits of the \a MapType, which is the input argument of the mapper
9794     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9795     // bits of MemberMapType.
9796     // [OpenMP 5.0], 1.2.6. map-type decay.
9797     //        | alloc |  to   | from  | tofrom | release | delete
9798     // ----------------------------------------------------------
9799     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9800     // to     | alloc |  to   | alloc |   to   | release | delete
9801     // from   | alloc | alloc | from  |  from  | release | delete
9802     // tofrom | alloc |  to   | from  | tofrom | release | delete
9803     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9804         MapType,
9805         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9806                                    MappableExprsHandler::OMP_MAP_FROM));
9807     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9808     llvm::BasicBlock *AllocElseBB =
9809         MapperCGF.createBasicBlock("omp.type.alloc.else");
9810     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9811     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9812     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9813     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9814     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9815     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9816     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9817     MapperCGF.EmitBlock(AllocBB);
9818     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9819         MemberMapType,
9820         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9821                                      MappableExprsHandler::OMP_MAP_FROM)));
9822     MapperCGF.Builder.CreateBr(EndBB);
9823     MapperCGF.EmitBlock(AllocElseBB);
9824     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9825         LeftToFrom,
9826         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9827     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9828     // In case of to, clear OMP_MAP_FROM.
9829     MapperCGF.EmitBlock(ToBB);
9830     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9831         MemberMapType,
9832         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9833     MapperCGF.Builder.CreateBr(EndBB);
9834     MapperCGF.EmitBlock(ToElseBB);
9835     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9836         LeftToFrom,
9837         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9838     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9839     // In case of from, clear OMP_MAP_TO.
9840     MapperCGF.EmitBlock(FromBB);
9841     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9842         MemberMapType,
9843         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9844     // In case of tofrom, do nothing.
9845     MapperCGF.EmitBlock(EndBB);
9846     llvm::PHINode *CurMapType =
9847         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9848     CurMapType->addIncoming(AllocMapType, AllocBB);
9849     CurMapType->addIncoming(ToMapType, ToBB);
9850     CurMapType->addIncoming(FromMapType, FromBB);
9851     CurMapType->addIncoming(MemberMapType, ToElseBB);
9852 
9853     // TODO: call the corresponding mapper function if a user-defined mapper is
9854     // associated with this map clause.
9855     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9856     // data structure.
9857     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9858                                      CurSizeArg, CurMapType};
9859     MapperCGF.EmitRuntimeCall(
9860         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9861         OffloadingArgs);
9862   }
9863 
9864   // Update the pointer to point to the next element that needs to be mapped,
9865   // and check whether we have mapped all elements.
9866   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9867       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9868   PtrPHI->addIncoming(PtrNext, BodyBB);
9869   llvm::Value *IsDone =
9870       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9871   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9872   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9873 
9874   MapperCGF.EmitBlock(ExitBB);
9875   // Emit array deletion if this is an array section and \p MapType indicates
9876   // that deletion is required.
9877   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9878                              ElementSize, DoneBB, /*IsInit=*/false);
9879 
9880   // Emit the function exit block.
9881   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9882   MapperCGF.FinishFunction();
9883   UDMMap.try_emplace(D, Fn);
9884   if (CGF) {
9885     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9886     Decls.second.push_back(D);
9887   }
9888 }
9889 
9890 /// Emit the array initialization or deletion portion for user-defined mapper
9891 /// code generation. First, it evaluates whether an array section is mapped and
9892 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9893 /// true, and \a MapType indicates to not delete this array, array
9894 /// initialization code is generated. If \a IsInit is false, and \a MapType
9895 /// indicates to not this array, array deletion code is generated.
9896 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9897     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9898     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9899     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9900   StringRef Prefix = IsInit ? ".init" : ".del";
9901 
9902   // Evaluate if this is an array section.
9903   llvm::BasicBlock *IsDeleteBB =
9904       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9905   llvm::BasicBlock *BodyBB =
9906       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9907   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9908       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9909   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9910 
9911   // Evaluate if we are going to delete this section.
9912   MapperCGF.EmitBlock(IsDeleteBB);
9913   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9914       MapType,
9915       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9916   llvm::Value *DeleteCond;
9917   if (IsInit) {
9918     DeleteCond = MapperCGF.Builder.CreateIsNull(
9919         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9920   } else {
9921     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9922         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9923   }
9924   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9925 
9926   MapperCGF.EmitBlock(BodyBB);
9927   // Get the array size by multiplying element size and element number (i.e., \p
9928   // Size).
9929   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9930       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9931   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9932   // memory allocation/deletion purpose only.
9933   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9934       MapType,
9935       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9936                                    MappableExprsHandler::OMP_MAP_FROM)));
9937   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9938   // data structure.
9939   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9940   MapperCGF.EmitRuntimeCall(
9941       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9942 }
9943 
9944 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9945     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9946     llvm::Value *DeviceID,
9947     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9948                                      const OMPLoopDirective &D)>
9949         SizeEmitter) {
9950   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9951   const OMPExecutableDirective *TD = &D;
9952   // Get nested teams distribute kind directive, if any.
9953   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9954     TD = getNestedDistributeDirective(CGM.getContext(), D);
9955   if (!TD)
9956     return;
9957   const auto *LD = cast<OMPLoopDirective>(TD);
9958   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9959                                                      PrePostActionTy &) {
9960     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9961       llvm::Value *Args[] = {DeviceID, NumIterations};
9962       CGF.EmitRuntimeCall(
9963           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9964     }
9965   };
9966   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9967 }
9968 
9969 void CGOpenMPRuntime::emitTargetCall(
9970     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9971     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9972     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9973     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9974                                      const OMPLoopDirective &D)>
9975         SizeEmitter) {
9976   if (!CGF.HaveInsertPoint())
9977     return;
9978 
9979   assert(OutlinedFn && "Invalid outlined function!");
9980 
9981   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9982   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9983   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9984   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9985                                             PrePostActionTy &) {
9986     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9987   };
9988   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9989 
9990   CodeGenFunction::OMPTargetDataInfo InputInfo;
9991   llvm::Value *MapTypesArray = nullptr;
9992   // Fill up the pointer arrays and transfer execution to the device.
9993   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9994                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9995                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9996     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9997       // Reverse offloading is not supported, so just execute on the host.
9998       if (RequiresOuterTask) {
9999         CapturedVars.clear();
10000         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10001       }
10002       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10003       return;
10004     }
10005 
10006     // On top of the arrays that were filled up, the target offloading call
10007     // takes as arguments the device id as well as the host pointer. The host
10008     // pointer is used by the runtime library to identify the current target
10009     // region, so it only has to be unique and not necessarily point to
10010     // anything. It could be the pointer to the outlined function that
10011     // implements the target region, but we aren't using that so that the
10012     // compiler doesn't need to keep that, and could therefore inline the host
10013     // function if proven worthwhile during optimization.
10014 
10015     // From this point on, we need to have an ID of the target region defined.
10016     assert(OutlinedFnID && "Invalid outlined function ID!");
10017 
10018     // Emit device ID if any.
10019     llvm::Value *DeviceID;
10020     if (Device.getPointer()) {
10021       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10022               Device.getInt() == OMPC_DEVICE_device_num) &&
10023              "Expected device_num modifier.");
10024       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10025       DeviceID =
10026           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10027     } else {
10028       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10029     }
10030 
10031     // Emit the number of elements in the offloading arrays.
10032     llvm::Value *PointerNum =
10033         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10034 
10035     // Return value of the runtime offloading call.
10036     llvm::Value *Return;
10037 
10038     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10039     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10040 
10041     // Emit tripcount for the target loop-based directive.
10042     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10043 
10044     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10045     // The target region is an outlined function launched by the runtime
10046     // via calls __tgt_target() or __tgt_target_teams().
10047     //
10048     // __tgt_target() launches a target region with one team and one thread,
10049     // executing a serial region.  This master thread may in turn launch
10050     // more threads within its team upon encountering a parallel region,
10051     // however, no additional teams can be launched on the device.
10052     //
10053     // __tgt_target_teams() launches a target region with one or more teams,
10054     // each with one or more threads.  This call is required for target
10055     // constructs such as:
10056     //  'target teams'
10057     //  'target' / 'teams'
10058     //  'target teams distribute parallel for'
10059     //  'target parallel'
10060     // and so on.
10061     //
10062     // Note that on the host and CPU targets, the runtime implementation of
10063     // these calls simply call the outlined function without forking threads.
10064     // The outlined functions themselves have runtime calls to
10065     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10066     // the compiler in emitTeamsCall() and emitParallelCall().
10067     //
10068     // In contrast, on the NVPTX target, the implementation of
10069     // __tgt_target_teams() launches a GPU kernel with the requested number
10070     // of teams and threads so no additional calls to the runtime are required.
10071     if (NumTeams) {
10072       // If we have NumTeams defined this means that we have an enclosed teams
10073       // region. Therefore we also expect to have NumThreads defined. These two
10074       // values should be defined in the presence of a teams directive,
10075       // regardless of having any clauses associated. If the user is using teams
10076       // but no clauses, these two values will be the default that should be
10077       // passed to the runtime library - a 32-bit integer with the value zero.
10078       assert(NumThreads && "Thread limit expression should be available along "
10079                            "with number of teams.");
10080       llvm::Value *OffloadingArgs[] = {DeviceID,
10081                                        OutlinedFnID,
10082                                        PointerNum,
10083                                        InputInfo.BasePointersArray.getPointer(),
10084                                        InputInfo.PointersArray.getPointer(),
10085                                        InputInfo.SizesArray.getPointer(),
10086                                        MapTypesArray,
10087                                        NumTeams,
10088                                        NumThreads};
10089       Return = CGF.EmitRuntimeCall(
10090           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
10091                                           : OMPRTL__tgt_target_teams),
10092           OffloadingArgs);
10093     } else {
10094       llvm::Value *OffloadingArgs[] = {DeviceID,
10095                                        OutlinedFnID,
10096                                        PointerNum,
10097                                        InputInfo.BasePointersArray.getPointer(),
10098                                        InputInfo.PointersArray.getPointer(),
10099                                        InputInfo.SizesArray.getPointer(),
10100                                        MapTypesArray};
10101       Return = CGF.EmitRuntimeCall(
10102           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
10103                                           : OMPRTL__tgt_target),
10104           OffloadingArgs);
10105     }
10106 
10107     // Check the error code and execute the host version if required.
10108     llvm::BasicBlock *OffloadFailedBlock =
10109         CGF.createBasicBlock("omp_offload.failed");
10110     llvm::BasicBlock *OffloadContBlock =
10111         CGF.createBasicBlock("omp_offload.cont");
10112     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10113     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10114 
10115     CGF.EmitBlock(OffloadFailedBlock);
10116     if (RequiresOuterTask) {
10117       CapturedVars.clear();
10118       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10119     }
10120     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10121     CGF.EmitBranch(OffloadContBlock);
10122 
10123     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10124   };
10125 
10126   // Notify that the host version must be executed.
10127   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10128                     RequiresOuterTask](CodeGenFunction &CGF,
10129                                        PrePostActionTy &) {
10130     if (RequiresOuterTask) {
10131       CapturedVars.clear();
10132       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10133     }
10134     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10135   };
10136 
10137   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10138                           &CapturedVars, RequiresOuterTask,
10139                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10140     // Fill up the arrays with all the captured variables.
10141     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10142     MappableExprsHandler::MapValuesArrayTy Pointers;
10143     MappableExprsHandler::MapValuesArrayTy Sizes;
10144     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10145 
10146     // Get mappable expression information.
10147     MappableExprsHandler MEHandler(D, CGF);
10148     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10149 
10150     auto RI = CS.getCapturedRecordDecl()->field_begin();
10151     auto CV = CapturedVars.begin();
10152     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10153                                               CE = CS.capture_end();
10154          CI != CE; ++CI, ++RI, ++CV) {
10155       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
10156       MappableExprsHandler::MapValuesArrayTy CurPointers;
10157       MappableExprsHandler::MapValuesArrayTy CurSizes;
10158       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
10159       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10160 
10161       // VLA sizes are passed to the outlined region by copy and do not have map
10162       // information associated.
10163       if (CI->capturesVariableArrayType()) {
10164         CurBasePointers.push_back(*CV);
10165         CurPointers.push_back(*CV);
10166         CurSizes.push_back(CGF.Builder.CreateIntCast(
10167             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10168         // Copy to the device as an argument. No need to retrieve it.
10169         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10170                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10171                               MappableExprsHandler::OMP_MAP_IMPLICIT);
10172       } else {
10173         // If we have any information in the map clause, we use it, otherwise we
10174         // just do a default mapping.
10175         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
10176                                          CurSizes, CurMapTypes, PartialStruct);
10177         if (CurBasePointers.empty())
10178           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
10179                                            CurPointers, CurSizes, CurMapTypes);
10180         // Generate correct mapping for variables captured by reference in
10181         // lambdas.
10182         if (CI->capturesVariable())
10183           MEHandler.generateInfoForLambdaCaptures(
10184               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
10185               CurMapTypes, LambdaPointers);
10186       }
10187       // We expect to have at least an element of information for this capture.
10188       assert(!CurBasePointers.empty() &&
10189              "Non-existing map pointer for capture!");
10190       assert(CurBasePointers.size() == CurPointers.size() &&
10191              CurBasePointers.size() == CurSizes.size() &&
10192              CurBasePointers.size() == CurMapTypes.size() &&
10193              "Inconsistent map information sizes!");
10194 
10195       // If there is an entry in PartialStruct it means we have a struct with
10196       // individual members mapped. Emit an extra combined entry.
10197       if (PartialStruct.Base.isValid())
10198         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
10199                                     CurMapTypes, PartialStruct);
10200 
10201       // We need to append the results of this capture to what we already have.
10202       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
10203       Pointers.append(CurPointers.begin(), CurPointers.end());
10204       Sizes.append(CurSizes.begin(), CurSizes.end());
10205       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
10206     }
10207     // Adjust MEMBER_OF flags for the lambdas captures.
10208     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
10209                                               Pointers, MapTypes);
10210     // Map other list items in the map clause which are not captured variables
10211     // but "declare target link" global variables.
10212     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
10213                                                MapTypes);
10214 
10215     TargetDataInfo Info;
10216     // Fill up the arrays and create the arguments.
10217     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10218     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10219                                  Info.PointersArray, Info.SizesArray,
10220                                  Info.MapTypesArray, Info);
10221     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10222     InputInfo.BasePointersArray =
10223         Address(Info.BasePointersArray, CGM.getPointerAlign());
10224     InputInfo.PointersArray =
10225         Address(Info.PointersArray, CGM.getPointerAlign());
10226     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10227     MapTypesArray = Info.MapTypesArray;
10228     if (RequiresOuterTask)
10229       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10230     else
10231       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10232   };
10233 
10234   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10235                              CodeGenFunction &CGF, PrePostActionTy &) {
10236     if (RequiresOuterTask) {
10237       CodeGenFunction::OMPTargetDataInfo InputInfo;
10238       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10239     } else {
10240       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10241     }
10242   };
10243 
10244   // If we have a target function ID it means that we need to support
10245   // offloading, otherwise, just execute on the host. We need to execute on host
10246   // regardless of the conditional in the if clause if, e.g., the user do not
10247   // specify target triples.
10248   if (OutlinedFnID) {
10249     if (IfCond) {
10250       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10251     } else {
10252       RegionCodeGenTy ThenRCG(TargetThenGen);
10253       ThenRCG(CGF);
10254     }
10255   } else {
10256     RegionCodeGenTy ElseRCG(TargetElseGen);
10257     ElseRCG(CGF);
10258   }
10259 }
10260 
10261 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10262                                                     StringRef ParentName) {
10263   if (!S)
10264     return;
10265 
10266   // Codegen OMP target directives that offload compute to the device.
10267   bool RequiresDeviceCodegen =
10268       isa<OMPExecutableDirective>(S) &&
10269       isOpenMPTargetExecutionDirective(
10270           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10271 
10272   if (RequiresDeviceCodegen) {
10273     const auto &E = *cast<OMPExecutableDirective>(S);
10274     unsigned DeviceID;
10275     unsigned FileID;
10276     unsigned Line;
10277     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10278                              FileID, Line);
10279 
10280     // Is this a target region that should not be emitted as an entry point? If
10281     // so just signal we are done with this target region.
10282     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10283                                                             ParentName, Line))
10284       return;
10285 
10286     switch (E.getDirectiveKind()) {
10287     case OMPD_target:
10288       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10289                                                    cast<OMPTargetDirective>(E));
10290       break;
10291     case OMPD_target_parallel:
10292       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10293           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10294       break;
10295     case OMPD_target_teams:
10296       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10297           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10298       break;
10299     case OMPD_target_teams_distribute:
10300       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10301           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10302       break;
10303     case OMPD_target_teams_distribute_simd:
10304       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10305           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10306       break;
10307     case OMPD_target_parallel_for:
10308       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10309           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10310       break;
10311     case OMPD_target_parallel_for_simd:
10312       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10313           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10314       break;
10315     case OMPD_target_simd:
10316       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10317           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10318       break;
10319     case OMPD_target_teams_distribute_parallel_for:
10320       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10321           CGM, ParentName,
10322           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10323       break;
10324     case OMPD_target_teams_distribute_parallel_for_simd:
10325       CodeGenFunction::
10326           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10327               CGM, ParentName,
10328               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10329       break;
10330     case OMPD_parallel:
10331     case OMPD_for:
10332     case OMPD_parallel_for:
10333     case OMPD_parallel_master:
10334     case OMPD_parallel_sections:
10335     case OMPD_for_simd:
10336     case OMPD_parallel_for_simd:
10337     case OMPD_cancel:
10338     case OMPD_cancellation_point:
10339     case OMPD_ordered:
10340     case OMPD_threadprivate:
10341     case OMPD_allocate:
10342     case OMPD_task:
10343     case OMPD_simd:
10344     case OMPD_sections:
10345     case OMPD_section:
10346     case OMPD_single:
10347     case OMPD_master:
10348     case OMPD_critical:
10349     case OMPD_taskyield:
10350     case OMPD_barrier:
10351     case OMPD_taskwait:
10352     case OMPD_taskgroup:
10353     case OMPD_atomic:
10354     case OMPD_flush:
10355     case OMPD_depobj:
10356     case OMPD_scan:
10357     case OMPD_teams:
10358     case OMPD_target_data:
10359     case OMPD_target_exit_data:
10360     case OMPD_target_enter_data:
10361     case OMPD_distribute:
10362     case OMPD_distribute_simd:
10363     case OMPD_distribute_parallel_for:
10364     case OMPD_distribute_parallel_for_simd:
10365     case OMPD_teams_distribute:
10366     case OMPD_teams_distribute_simd:
10367     case OMPD_teams_distribute_parallel_for:
10368     case OMPD_teams_distribute_parallel_for_simd:
10369     case OMPD_target_update:
10370     case OMPD_declare_simd:
10371     case OMPD_declare_variant:
10372     case OMPD_begin_declare_variant:
10373     case OMPD_end_declare_variant:
10374     case OMPD_declare_target:
10375     case OMPD_end_declare_target:
10376     case OMPD_declare_reduction:
10377     case OMPD_declare_mapper:
10378     case OMPD_taskloop:
10379     case OMPD_taskloop_simd:
10380     case OMPD_master_taskloop:
10381     case OMPD_master_taskloop_simd:
10382     case OMPD_parallel_master_taskloop:
10383     case OMPD_parallel_master_taskloop_simd:
10384     case OMPD_requires:
10385     case OMPD_unknown:
10386       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10387     }
10388     return;
10389   }
10390 
10391   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10392     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10393       return;
10394 
10395     scanForTargetRegionsFunctions(
10396         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
10397     return;
10398   }
10399 
10400   // If this is a lambda function, look into its body.
10401   if (const auto *L = dyn_cast<LambdaExpr>(S))
10402     S = L->getBody();
10403 
10404   // Keep looking for target regions recursively.
10405   for (const Stmt *II : S->children())
10406     scanForTargetRegionsFunctions(II, ParentName);
10407 }
10408 
10409 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10410   // If emitting code for the host, we do not process FD here. Instead we do
10411   // the normal code generation.
10412   if (!CGM.getLangOpts().OpenMPIsDevice) {
10413     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10414       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10415           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10416       // Do not emit device_type(nohost) functions for the host.
10417       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10418         return true;
10419     }
10420     return false;
10421   }
10422 
10423   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10424   // Try to detect target regions in the function.
10425   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10426     StringRef Name = CGM.getMangledName(GD);
10427     scanForTargetRegionsFunctions(FD->getBody(), Name);
10428     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10429         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10430     // Do not emit device_type(nohost) functions for the host.
10431     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10432       return true;
10433   }
10434 
10435   // Do not to emit function if it is not marked as declare target.
10436   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10437          AlreadyEmittedTargetDecls.count(VD) == 0;
10438 }
10439 
10440 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10441   if (!CGM.getLangOpts().OpenMPIsDevice)
10442     return false;
10443 
10444   // Check if there are Ctors/Dtors in this declaration and look for target
10445   // regions in it. We use the complete variant to produce the kernel name
10446   // mangling.
10447   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10448   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10449     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10450       StringRef ParentName =
10451           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10452       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10453     }
10454     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10455       StringRef ParentName =
10456           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10457       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10458     }
10459   }
10460 
10461   // Do not to emit variable if it is not marked as declare target.
10462   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10463       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10464           cast<VarDecl>(GD.getDecl()));
10465   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10466       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10467        HasRequiresUnifiedSharedMemory)) {
10468     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10469     return true;
10470   }
10471   return false;
10472 }
10473 
10474 llvm::Constant *
10475 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10476                                                 const VarDecl *VD) {
10477   assert(VD->getType().isConstant(CGM.getContext()) &&
10478          "Expected constant variable.");
10479   StringRef VarName;
10480   llvm::Constant *Addr;
10481   llvm::GlobalValue::LinkageTypes Linkage;
10482   QualType Ty = VD->getType();
10483   SmallString<128> Buffer;
10484   {
10485     unsigned DeviceID;
10486     unsigned FileID;
10487     unsigned Line;
10488     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10489                              FileID, Line);
10490     llvm::raw_svector_ostream OS(Buffer);
10491     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10492        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10493     VarName = OS.str();
10494   }
10495   Linkage = llvm::GlobalValue::InternalLinkage;
10496   Addr =
10497       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10498                                   getDefaultFirstprivateAddressSpace());
10499   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10500   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10501   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10502   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10503       VarName, Addr, VarSize,
10504       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10505   return Addr;
10506 }
10507 
10508 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10509                                                    llvm::Constant *Addr) {
10510   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10511       !CGM.getLangOpts().OpenMPIsDevice)
10512     return;
10513   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10514       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10515   if (!Res) {
10516     if (CGM.getLangOpts().OpenMPIsDevice) {
10517       // Register non-target variables being emitted in device code (debug info
10518       // may cause this).
10519       StringRef VarName = CGM.getMangledName(VD);
10520       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10521     }
10522     return;
10523   }
10524   // Register declare target variables.
10525   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10526   StringRef VarName;
10527   CharUnits VarSize;
10528   llvm::GlobalValue::LinkageTypes Linkage;
10529 
10530   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10531       !HasRequiresUnifiedSharedMemory) {
10532     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10533     VarName = CGM.getMangledName(VD);
10534     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10535       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10536       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10537     } else {
10538       VarSize = CharUnits::Zero();
10539     }
10540     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10541     // Temp solution to prevent optimizations of the internal variables.
10542     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10543       std::string RefName = getName({VarName, "ref"});
10544       if (!CGM.GetGlobalValue(RefName)) {
10545         llvm::Constant *AddrRef =
10546             getOrCreateInternalVariable(Addr->getType(), RefName);
10547         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10548         GVAddrRef->setConstant(/*Val=*/true);
10549         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10550         GVAddrRef->setInitializer(Addr);
10551         CGM.addCompilerUsedGlobal(GVAddrRef);
10552       }
10553     }
10554   } else {
10555     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10556             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10557              HasRequiresUnifiedSharedMemory)) &&
10558            "Declare target attribute must link or to with unified memory.");
10559     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10560       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10561     else
10562       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10563 
10564     if (CGM.getLangOpts().OpenMPIsDevice) {
10565       VarName = Addr->getName();
10566       Addr = nullptr;
10567     } else {
10568       VarName = getAddrOfDeclareTargetVar(VD).getName();
10569       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10570     }
10571     VarSize = CGM.getPointerSize();
10572     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10573   }
10574 
10575   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10576       VarName, Addr, VarSize, Flags, Linkage);
10577 }
10578 
10579 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10580   if (isa<FunctionDecl>(GD.getDecl()) ||
10581       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10582     return emitTargetFunctions(GD);
10583 
10584   return emitTargetGlobalVariable(GD);
10585 }
10586 
10587 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10588   for (const VarDecl *VD : DeferredGlobalVariables) {
10589     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10590         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10591     if (!Res)
10592       continue;
10593     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10594         !HasRequiresUnifiedSharedMemory) {
10595       CGM.EmitGlobal(VD);
10596     } else {
10597       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10598               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10599                HasRequiresUnifiedSharedMemory)) &&
10600              "Expected link clause or to clause with unified memory.");
10601       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10602     }
10603   }
10604 }
10605 
10606 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10607     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10608   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10609          " Expected target-based directive.");
10610 }
10611 
10612 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10613   for (const OMPClause *Clause : D->clauselists()) {
10614     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10615       HasRequiresUnifiedSharedMemory = true;
10616     } else if (const auto *AC =
10617                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10618       switch (AC->getAtomicDefaultMemOrderKind()) {
10619       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10620         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10621         break;
10622       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10623         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10624         break;
10625       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10626         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10627         break;
10628       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10629         break;
10630       }
10631     }
10632   }
10633 }
10634 
10635 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10636   return RequiresAtomicOrdering;
10637 }
10638 
10639 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10640                                                        LangAS &AS) {
10641   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10642     return false;
10643   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10644   switch(A->getAllocatorType()) {
10645   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10646   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10647   // Not supported, fallback to the default mem space.
10648   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10649   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10650   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10651   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10652   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10653   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10654   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10655     AS = LangAS::Default;
10656     return true;
10657   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10658     llvm_unreachable("Expected predefined allocator for the variables with the "
10659                      "static storage.");
10660   }
10661   return false;
10662 }
10663 
10664 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10665   return HasRequiresUnifiedSharedMemory;
10666 }
10667 
10668 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10669     CodeGenModule &CGM)
10670     : CGM(CGM) {
10671   if (CGM.getLangOpts().OpenMPIsDevice) {
10672     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10673     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10674   }
10675 }
10676 
10677 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10678   if (CGM.getLangOpts().OpenMPIsDevice)
10679     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10680 }
10681 
10682 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10683   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10684     return true;
10685 
10686   const auto *D = cast<FunctionDecl>(GD.getDecl());
10687   // Do not to emit function if it is marked as declare target as it was already
10688   // emitted.
10689   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10690     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10691       if (auto *F = dyn_cast_or_null<llvm::Function>(
10692               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10693         return !F->isDeclaration();
10694       return false;
10695     }
10696     return true;
10697   }
10698 
10699   return !AlreadyEmittedTargetDecls.insert(D).second;
10700 }
10701 
10702 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10703   // If we don't have entries or if we are emitting code for the device, we
10704   // don't need to do anything.
10705   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10706       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10707       (OffloadEntriesInfoManager.empty() &&
10708        !HasEmittedDeclareTargetRegion &&
10709        !HasEmittedTargetRegion))
10710     return nullptr;
10711 
10712   // Create and register the function that handles the requires directives.
10713   ASTContext &C = CGM.getContext();
10714 
10715   llvm::Function *RequiresRegFn;
10716   {
10717     CodeGenFunction CGF(CGM);
10718     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10719     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10720     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10721     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10722     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10723     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10724     // TODO: check for other requires clauses.
10725     // The requires directive takes effect only when a target region is
10726     // present in the compilation unit. Otherwise it is ignored and not
10727     // passed to the runtime. This avoids the runtime from throwing an error
10728     // for mismatching requires clauses across compilation units that don't
10729     // contain at least 1 target region.
10730     assert((HasEmittedTargetRegion ||
10731             HasEmittedDeclareTargetRegion ||
10732             !OffloadEntriesInfoManager.empty()) &&
10733            "Target or declare target region expected.");
10734     if (HasRequiresUnifiedSharedMemory)
10735       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10736     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10737         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10738     CGF.FinishFunction();
10739   }
10740   return RequiresRegFn;
10741 }
10742 
10743 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10744                                     const OMPExecutableDirective &D,
10745                                     SourceLocation Loc,
10746                                     llvm::Function *OutlinedFn,
10747                                     ArrayRef<llvm::Value *> CapturedVars) {
10748   if (!CGF.HaveInsertPoint())
10749     return;
10750 
10751   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10752   CodeGenFunction::RunCleanupsScope Scope(CGF);
10753 
10754   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10755   llvm::Value *Args[] = {
10756       RTLoc,
10757       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10758       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10759   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10760   RealArgs.append(std::begin(Args), std::end(Args));
10761   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10762 
10763   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10764   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10765 }
10766 
10767 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10768                                          const Expr *NumTeams,
10769                                          const Expr *ThreadLimit,
10770                                          SourceLocation Loc) {
10771   if (!CGF.HaveInsertPoint())
10772     return;
10773 
10774   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10775 
10776   llvm::Value *NumTeamsVal =
10777       NumTeams
10778           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10779                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10780           : CGF.Builder.getInt32(0);
10781 
10782   llvm::Value *ThreadLimitVal =
10783       ThreadLimit
10784           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10785                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10786           : CGF.Builder.getInt32(0);
10787 
10788   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10789   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10790                                      ThreadLimitVal};
10791   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10792                       PushNumTeamsArgs);
10793 }
10794 
10795 void CGOpenMPRuntime::emitTargetDataCalls(
10796     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10797     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10798   if (!CGF.HaveInsertPoint())
10799     return;
10800 
10801   // Action used to replace the default codegen action and turn privatization
10802   // off.
10803   PrePostActionTy NoPrivAction;
10804 
10805   // Generate the code for the opening of the data environment. Capture all the
10806   // arguments of the runtime call by reference because they are used in the
10807   // closing of the region.
10808   auto &&BeginThenGen = [this, &D, Device, &Info,
10809                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10810     // Fill up the arrays with all the mapped variables.
10811     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10812     MappableExprsHandler::MapValuesArrayTy Pointers;
10813     MappableExprsHandler::MapValuesArrayTy Sizes;
10814     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10815 
10816     // Get map clause information.
10817     MappableExprsHandler MCHandler(D, CGF);
10818     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10819 
10820     // Fill up the arrays and create the arguments.
10821     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10822 
10823     llvm::Value *BasePointersArrayArg = nullptr;
10824     llvm::Value *PointersArrayArg = nullptr;
10825     llvm::Value *SizesArrayArg = nullptr;
10826     llvm::Value *MapTypesArrayArg = nullptr;
10827     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10828                                  SizesArrayArg, MapTypesArrayArg, Info);
10829 
10830     // Emit device ID if any.
10831     llvm::Value *DeviceID = nullptr;
10832     if (Device) {
10833       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10834                                            CGF.Int64Ty, /*isSigned=*/true);
10835     } else {
10836       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10837     }
10838 
10839     // Emit the number of elements in the offloading arrays.
10840     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10841 
10842     llvm::Value *OffloadingArgs[] = {
10843         DeviceID,         PointerNum,    BasePointersArrayArg,
10844         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10845     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10846                         OffloadingArgs);
10847 
10848     // If device pointer privatization is required, emit the body of the region
10849     // here. It will have to be duplicated: with and without privatization.
10850     if (!Info.CaptureDeviceAddrMap.empty())
10851       CodeGen(CGF);
10852   };
10853 
10854   // Generate code for the closing of the data region.
10855   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10856                                             PrePostActionTy &) {
10857     assert(Info.isValid() && "Invalid data environment closing arguments.");
10858 
10859     llvm::Value *BasePointersArrayArg = nullptr;
10860     llvm::Value *PointersArrayArg = nullptr;
10861     llvm::Value *SizesArrayArg = nullptr;
10862     llvm::Value *MapTypesArrayArg = nullptr;
10863     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10864                                  SizesArrayArg, MapTypesArrayArg, Info);
10865 
10866     // Emit device ID if any.
10867     llvm::Value *DeviceID = nullptr;
10868     if (Device) {
10869       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10870                                            CGF.Int64Ty, /*isSigned=*/true);
10871     } else {
10872       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10873     }
10874 
10875     // Emit the number of elements in the offloading arrays.
10876     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10877 
10878     llvm::Value *OffloadingArgs[] = {
10879         DeviceID,         PointerNum,    BasePointersArrayArg,
10880         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10881     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10882                         OffloadingArgs);
10883   };
10884 
10885   // If we need device pointer privatization, we need to emit the body of the
10886   // region with no privatization in the 'else' branch of the conditional.
10887   // Otherwise, we don't have to do anything.
10888   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10889                                                          PrePostActionTy &) {
10890     if (!Info.CaptureDeviceAddrMap.empty()) {
10891       CodeGen.setAction(NoPrivAction);
10892       CodeGen(CGF);
10893     }
10894   };
10895 
10896   // We don't have to do anything to close the region if the if clause evaluates
10897   // to false.
10898   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10899 
10900   if (IfCond) {
10901     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10902   } else {
10903     RegionCodeGenTy RCG(BeginThenGen);
10904     RCG(CGF);
10905   }
10906 
10907   // If we don't require privatization of device pointers, we emit the body in
10908   // between the runtime calls. This avoids duplicating the body code.
10909   if (Info.CaptureDeviceAddrMap.empty()) {
10910     CodeGen.setAction(NoPrivAction);
10911     CodeGen(CGF);
10912   }
10913 
10914   if (IfCond) {
10915     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10916   } else {
10917     RegionCodeGenTy RCG(EndThenGen);
10918     RCG(CGF);
10919   }
10920 }
10921 
10922 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10923     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10924     const Expr *Device) {
10925   if (!CGF.HaveInsertPoint())
10926     return;
10927 
10928   assert((isa<OMPTargetEnterDataDirective>(D) ||
10929           isa<OMPTargetExitDataDirective>(D) ||
10930           isa<OMPTargetUpdateDirective>(D)) &&
10931          "Expecting either target enter, exit data, or update directives.");
10932 
10933   CodeGenFunction::OMPTargetDataInfo InputInfo;
10934   llvm::Value *MapTypesArray = nullptr;
10935   // Generate the code for the opening of the data environment.
10936   auto &&ThenGen = [this, &D, Device, &InputInfo,
10937                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10938     // Emit device ID if any.
10939     llvm::Value *DeviceID = nullptr;
10940     if (Device) {
10941       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10942                                            CGF.Int64Ty, /*isSigned=*/true);
10943     } else {
10944       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10945     }
10946 
10947     // Emit the number of elements in the offloading arrays.
10948     llvm::Constant *PointerNum =
10949         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10950 
10951     llvm::Value *OffloadingArgs[] = {DeviceID,
10952                                      PointerNum,
10953                                      InputInfo.BasePointersArray.getPointer(),
10954                                      InputInfo.PointersArray.getPointer(),
10955                                      InputInfo.SizesArray.getPointer(),
10956                                      MapTypesArray};
10957 
10958     // Select the right runtime function call for each expected standalone
10959     // directive.
10960     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10961     OpenMPRTLFunction RTLFn;
10962     switch (D.getDirectiveKind()) {
10963     case OMPD_target_enter_data:
10964       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10965                         : OMPRTL__tgt_target_data_begin;
10966       break;
10967     case OMPD_target_exit_data:
10968       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10969                         : OMPRTL__tgt_target_data_end;
10970       break;
10971     case OMPD_target_update:
10972       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10973                         : OMPRTL__tgt_target_data_update;
10974       break;
10975     case OMPD_parallel:
10976     case OMPD_for:
10977     case OMPD_parallel_for:
10978     case OMPD_parallel_master:
10979     case OMPD_parallel_sections:
10980     case OMPD_for_simd:
10981     case OMPD_parallel_for_simd:
10982     case OMPD_cancel:
10983     case OMPD_cancellation_point:
10984     case OMPD_ordered:
10985     case OMPD_threadprivate:
10986     case OMPD_allocate:
10987     case OMPD_task:
10988     case OMPD_simd:
10989     case OMPD_sections:
10990     case OMPD_section:
10991     case OMPD_single:
10992     case OMPD_master:
10993     case OMPD_critical:
10994     case OMPD_taskyield:
10995     case OMPD_barrier:
10996     case OMPD_taskwait:
10997     case OMPD_taskgroup:
10998     case OMPD_atomic:
10999     case OMPD_flush:
11000     case OMPD_depobj:
11001     case OMPD_scan:
11002     case OMPD_teams:
11003     case OMPD_target_data:
11004     case OMPD_distribute:
11005     case OMPD_distribute_simd:
11006     case OMPD_distribute_parallel_for:
11007     case OMPD_distribute_parallel_for_simd:
11008     case OMPD_teams_distribute:
11009     case OMPD_teams_distribute_simd:
11010     case OMPD_teams_distribute_parallel_for:
11011     case OMPD_teams_distribute_parallel_for_simd:
11012     case OMPD_declare_simd:
11013     case OMPD_declare_variant:
11014     case OMPD_begin_declare_variant:
11015     case OMPD_end_declare_variant:
11016     case OMPD_declare_target:
11017     case OMPD_end_declare_target:
11018     case OMPD_declare_reduction:
11019     case OMPD_declare_mapper:
11020     case OMPD_taskloop:
11021     case OMPD_taskloop_simd:
11022     case OMPD_master_taskloop:
11023     case OMPD_master_taskloop_simd:
11024     case OMPD_parallel_master_taskloop:
11025     case OMPD_parallel_master_taskloop_simd:
11026     case OMPD_target:
11027     case OMPD_target_simd:
11028     case OMPD_target_teams_distribute:
11029     case OMPD_target_teams_distribute_simd:
11030     case OMPD_target_teams_distribute_parallel_for:
11031     case OMPD_target_teams_distribute_parallel_for_simd:
11032     case OMPD_target_teams:
11033     case OMPD_target_parallel:
11034     case OMPD_target_parallel_for:
11035     case OMPD_target_parallel_for_simd:
11036     case OMPD_requires:
11037     case OMPD_unknown:
11038       llvm_unreachable("Unexpected standalone target data directive.");
11039       break;
11040     }
11041     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
11042   };
11043 
11044   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
11045                              CodeGenFunction &CGF, PrePostActionTy &) {
11046     // Fill up the arrays with all the mapped variables.
11047     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
11048     MappableExprsHandler::MapValuesArrayTy Pointers;
11049     MappableExprsHandler::MapValuesArrayTy Sizes;
11050     MappableExprsHandler::MapFlagsArrayTy MapTypes;
11051 
11052     // Get map clause information.
11053     MappableExprsHandler MEHandler(D, CGF);
11054     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
11055 
11056     TargetDataInfo Info;
11057     // Fill up the arrays and create the arguments.
11058     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
11059     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
11060                                  Info.PointersArray, Info.SizesArray,
11061                                  Info.MapTypesArray, Info);
11062     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11063     InputInfo.BasePointersArray =
11064         Address(Info.BasePointersArray, CGM.getPointerAlign());
11065     InputInfo.PointersArray =
11066         Address(Info.PointersArray, CGM.getPointerAlign());
11067     InputInfo.SizesArray =
11068         Address(Info.SizesArray, CGM.getPointerAlign());
11069     MapTypesArray = Info.MapTypesArray;
11070     if (D.hasClausesOfKind<OMPDependClause>())
11071       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11072     else
11073       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11074   };
11075 
11076   if (IfCond) {
11077     emitIfClause(CGF, IfCond, TargetThenGen,
11078                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11079   } else {
11080     RegionCodeGenTy ThenRCG(TargetThenGen);
11081     ThenRCG(CGF);
11082   }
11083 }
11084 
11085 namespace {
11086   /// Kind of parameter in a function with 'declare simd' directive.
11087   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11088   /// Attribute set of the parameter.
11089   struct ParamAttrTy {
11090     ParamKindTy Kind = Vector;
11091     llvm::APSInt StrideOrArg;
11092     llvm::APSInt Alignment;
11093   };
11094 } // namespace
11095 
11096 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11097                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11098   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11099   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11100   // of that clause. The VLEN value must be power of 2.
11101   // In other case the notion of the function`s "characteristic data type" (CDT)
11102   // is used to compute the vector length.
11103   // CDT is defined in the following order:
11104   //   a) For non-void function, the CDT is the return type.
11105   //   b) If the function has any non-uniform, non-linear parameters, then the
11106   //   CDT is the type of the first such parameter.
11107   //   c) If the CDT determined by a) or b) above is struct, union, or class
11108   //   type which is pass-by-value (except for the type that maps to the
11109   //   built-in complex data type), the characteristic data type is int.
11110   //   d) If none of the above three cases is applicable, the CDT is int.
11111   // The VLEN is then determined based on the CDT and the size of vector
11112   // register of that ISA for which current vector version is generated. The
11113   // VLEN is computed using the formula below:
11114   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11115   // where vector register size specified in section 3.2.1 Registers and the
11116   // Stack Frame of original AMD64 ABI document.
11117   QualType RetType = FD->getReturnType();
11118   if (RetType.isNull())
11119     return 0;
11120   ASTContext &C = FD->getASTContext();
11121   QualType CDT;
11122   if (!RetType.isNull() && !RetType->isVoidType()) {
11123     CDT = RetType;
11124   } else {
11125     unsigned Offset = 0;
11126     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11127       if (ParamAttrs[Offset].Kind == Vector)
11128         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11129       ++Offset;
11130     }
11131     if (CDT.isNull()) {
11132       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11133         if (ParamAttrs[I + Offset].Kind == Vector) {
11134           CDT = FD->getParamDecl(I)->getType();
11135           break;
11136         }
11137       }
11138     }
11139   }
11140   if (CDT.isNull())
11141     CDT = C.IntTy;
11142   CDT = CDT->getCanonicalTypeUnqualified();
11143   if (CDT->isRecordType() || CDT->isUnionType())
11144     CDT = C.IntTy;
11145   return C.getTypeSize(CDT);
11146 }
11147 
11148 static void
11149 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11150                            const llvm::APSInt &VLENVal,
11151                            ArrayRef<ParamAttrTy> ParamAttrs,
11152                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11153   struct ISADataTy {
11154     char ISA;
11155     unsigned VecRegSize;
11156   };
11157   ISADataTy ISAData[] = {
11158       {
11159           'b', 128
11160       }, // SSE
11161       {
11162           'c', 256
11163       }, // AVX
11164       {
11165           'd', 256
11166       }, // AVX2
11167       {
11168           'e', 512
11169       }, // AVX512
11170   };
11171   llvm::SmallVector<char, 2> Masked;
11172   switch (State) {
11173   case OMPDeclareSimdDeclAttr::BS_Undefined:
11174     Masked.push_back('N');
11175     Masked.push_back('M');
11176     break;
11177   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11178     Masked.push_back('N');
11179     break;
11180   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11181     Masked.push_back('M');
11182     break;
11183   }
11184   for (char Mask : Masked) {
11185     for (const ISADataTy &Data : ISAData) {
11186       SmallString<256> Buffer;
11187       llvm::raw_svector_ostream Out(Buffer);
11188       Out << "_ZGV" << Data.ISA << Mask;
11189       if (!VLENVal) {
11190         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11191         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11192         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11193       } else {
11194         Out << VLENVal;
11195       }
11196       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11197         switch (ParamAttr.Kind){
11198         case LinearWithVarStride:
11199           Out << 's' << ParamAttr.StrideOrArg;
11200           break;
11201         case Linear:
11202           Out << 'l';
11203           if (ParamAttr.StrideOrArg != 1)
11204             Out << ParamAttr.StrideOrArg;
11205           break;
11206         case Uniform:
11207           Out << 'u';
11208           break;
11209         case Vector:
11210           Out << 'v';
11211           break;
11212         }
11213         if (!!ParamAttr.Alignment)
11214           Out << 'a' << ParamAttr.Alignment;
11215       }
11216       Out << '_' << Fn->getName();
11217       Fn->addFnAttr(Out.str());
11218     }
11219   }
11220 }
11221 
11222 // This are the Functions that are needed to mangle the name of the
11223 // vector functions generated by the compiler, according to the rules
11224 // defined in the "Vector Function ABI specifications for AArch64",
11225 // available at
11226 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11227 
11228 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11229 ///
11230 /// TODO: Need to implement the behavior for reference marked with a
11231 /// var or no linear modifiers (1.b in the section). For this, we
11232 /// need to extend ParamKindTy to support the linear modifiers.
11233 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11234   QT = QT.getCanonicalType();
11235 
11236   if (QT->isVoidType())
11237     return false;
11238 
11239   if (Kind == ParamKindTy::Uniform)
11240     return false;
11241 
11242   if (Kind == ParamKindTy::Linear)
11243     return false;
11244 
11245   // TODO: Handle linear references with modifiers
11246 
11247   if (Kind == ParamKindTy::LinearWithVarStride)
11248     return false;
11249 
11250   return true;
11251 }
11252 
11253 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11254 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11255   QT = QT.getCanonicalType();
11256   unsigned Size = C.getTypeSize(QT);
11257 
11258   // Only scalars and complex within 16 bytes wide set PVB to true.
11259   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11260     return false;
11261 
11262   if (QT->isFloatingType())
11263     return true;
11264 
11265   if (QT->isIntegerType())
11266     return true;
11267 
11268   if (QT->isPointerType())
11269     return true;
11270 
11271   // TODO: Add support for complex types (section 3.1.2, item 2).
11272 
11273   return false;
11274 }
11275 
11276 /// Computes the lane size (LS) of a return type or of an input parameter,
11277 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11278 /// TODO: Add support for references, section 3.2.1, item 1.
11279 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11280   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11281     QualType PTy = QT.getCanonicalType()->getPointeeType();
11282     if (getAArch64PBV(PTy, C))
11283       return C.getTypeSize(PTy);
11284   }
11285   if (getAArch64PBV(QT, C))
11286     return C.getTypeSize(QT);
11287 
11288   return C.getTypeSize(C.getUIntPtrType());
11289 }
11290 
11291 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11292 // signature of the scalar function, as defined in 3.2.2 of the
11293 // AAVFABI.
11294 static std::tuple<unsigned, unsigned, bool>
11295 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11296   QualType RetType = FD->getReturnType().getCanonicalType();
11297 
11298   ASTContext &C = FD->getASTContext();
11299 
11300   bool OutputBecomesInput = false;
11301 
11302   llvm::SmallVector<unsigned, 8> Sizes;
11303   if (!RetType->isVoidType()) {
11304     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11305     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11306       OutputBecomesInput = true;
11307   }
11308   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11309     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11310     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11311   }
11312 
11313   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11314   // The LS of a function parameter / return value can only be a power
11315   // of 2, starting from 8 bits, up to 128.
11316   assert(std::all_of(Sizes.begin(), Sizes.end(),
11317                      [](unsigned Size) {
11318                        return Size == 8 || Size == 16 || Size == 32 ||
11319                               Size == 64 || Size == 128;
11320                      }) &&
11321          "Invalid size");
11322 
11323   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11324                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11325                          OutputBecomesInput);
11326 }
11327 
11328 /// Mangle the parameter part of the vector function name according to
11329 /// their OpenMP classification. The mangling function is defined in
11330 /// section 3.5 of the AAVFABI.
11331 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11332   SmallString<256> Buffer;
11333   llvm::raw_svector_ostream Out(Buffer);
11334   for (const auto &ParamAttr : ParamAttrs) {
11335     switch (ParamAttr.Kind) {
11336     case LinearWithVarStride:
11337       Out << "ls" << ParamAttr.StrideOrArg;
11338       break;
11339     case Linear:
11340       Out << 'l';
11341       // Don't print the step value if it is not present or if it is
11342       // equal to 1.
11343       if (ParamAttr.StrideOrArg != 1)
11344         Out << ParamAttr.StrideOrArg;
11345       break;
11346     case Uniform:
11347       Out << 'u';
11348       break;
11349     case Vector:
11350       Out << 'v';
11351       break;
11352     }
11353 
11354     if (!!ParamAttr.Alignment)
11355       Out << 'a' << ParamAttr.Alignment;
11356   }
11357 
11358   return std::string(Out.str());
11359 }
11360 
11361 // Function used to add the attribute. The parameter `VLEN` is
11362 // templated to allow the use of "x" when targeting scalable functions
11363 // for SVE.
11364 template <typename T>
11365 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11366                                  char ISA, StringRef ParSeq,
11367                                  StringRef MangledName, bool OutputBecomesInput,
11368                                  llvm::Function *Fn) {
11369   SmallString<256> Buffer;
11370   llvm::raw_svector_ostream Out(Buffer);
11371   Out << Prefix << ISA << LMask << VLEN;
11372   if (OutputBecomesInput)
11373     Out << "v";
11374   Out << ParSeq << "_" << MangledName;
11375   Fn->addFnAttr(Out.str());
11376 }
11377 
11378 // Helper function to generate the Advanced SIMD names depending on
11379 // the value of the NDS when simdlen is not present.
11380 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11381                                       StringRef Prefix, char ISA,
11382                                       StringRef ParSeq, StringRef MangledName,
11383                                       bool OutputBecomesInput,
11384                                       llvm::Function *Fn) {
11385   switch (NDS) {
11386   case 8:
11387     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11388                          OutputBecomesInput, Fn);
11389     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11390                          OutputBecomesInput, Fn);
11391     break;
11392   case 16:
11393     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11394                          OutputBecomesInput, Fn);
11395     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11396                          OutputBecomesInput, Fn);
11397     break;
11398   case 32:
11399     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11400                          OutputBecomesInput, Fn);
11401     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11402                          OutputBecomesInput, Fn);
11403     break;
11404   case 64:
11405   case 128:
11406     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11407                          OutputBecomesInput, Fn);
11408     break;
11409   default:
11410     llvm_unreachable("Scalar type is too wide.");
11411   }
11412 }
11413 
11414 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11415 static void emitAArch64DeclareSimdFunction(
11416     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11417     ArrayRef<ParamAttrTy> ParamAttrs,
11418     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11419     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11420 
11421   // Get basic data for building the vector signature.
11422   const auto Data = getNDSWDS(FD, ParamAttrs);
11423   const unsigned NDS = std::get<0>(Data);
11424   const unsigned WDS = std::get<1>(Data);
11425   const bool OutputBecomesInput = std::get<2>(Data);
11426 
11427   // Check the values provided via `simdlen` by the user.
11428   // 1. A `simdlen(1)` doesn't produce vector signatures,
11429   if (UserVLEN == 1) {
11430     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11431         DiagnosticsEngine::Warning,
11432         "The clause simdlen(1) has no effect when targeting aarch64.");
11433     CGM.getDiags().Report(SLoc, DiagID);
11434     return;
11435   }
11436 
11437   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11438   // Advanced SIMD output.
11439   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11440     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11441         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11442                                     "power of 2 when targeting Advanced SIMD.");
11443     CGM.getDiags().Report(SLoc, DiagID);
11444     return;
11445   }
11446 
11447   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11448   // limits.
11449   if (ISA == 's' && UserVLEN != 0) {
11450     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11451       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11452           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11453                                       "lanes in the architectural constraints "
11454                                       "for SVE (min is 128-bit, max is "
11455                                       "2048-bit, by steps of 128-bit)");
11456       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11457       return;
11458     }
11459   }
11460 
11461   // Sort out parameter sequence.
11462   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11463   StringRef Prefix = "_ZGV";
11464   // Generate simdlen from user input (if any).
11465   if (UserVLEN) {
11466     if (ISA == 's') {
11467       // SVE generates only a masked function.
11468       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11469                            OutputBecomesInput, Fn);
11470     } else {
11471       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11472       // Advanced SIMD generates one or two functions, depending on
11473       // the `[not]inbranch` clause.
11474       switch (State) {
11475       case OMPDeclareSimdDeclAttr::BS_Undefined:
11476         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11477                              OutputBecomesInput, Fn);
11478         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11479                              OutputBecomesInput, Fn);
11480         break;
11481       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11482         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11483                              OutputBecomesInput, Fn);
11484         break;
11485       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11486         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11487                              OutputBecomesInput, Fn);
11488         break;
11489       }
11490     }
11491   } else {
11492     // If no user simdlen is provided, follow the AAVFABI rules for
11493     // generating the vector length.
11494     if (ISA == 's') {
11495       // SVE, section 3.4.1, item 1.
11496       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11497                            OutputBecomesInput, Fn);
11498     } else {
11499       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11500       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11501       // two vector names depending on the use of the clause
11502       // `[not]inbranch`.
11503       switch (State) {
11504       case OMPDeclareSimdDeclAttr::BS_Undefined:
11505         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11506                                   OutputBecomesInput, Fn);
11507         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11508                                   OutputBecomesInput, Fn);
11509         break;
11510       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11511         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11512                                   OutputBecomesInput, Fn);
11513         break;
11514       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11515         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11516                                   OutputBecomesInput, Fn);
11517         break;
11518       }
11519     }
11520   }
11521 }
11522 
11523 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11524                                               llvm::Function *Fn) {
11525   ASTContext &C = CGM.getContext();
11526   FD = FD->getMostRecentDecl();
11527   // Map params to their positions in function decl.
11528   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11529   if (isa<CXXMethodDecl>(FD))
11530     ParamPositions.try_emplace(FD, 0);
11531   unsigned ParamPos = ParamPositions.size();
11532   for (const ParmVarDecl *P : FD->parameters()) {
11533     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11534     ++ParamPos;
11535   }
11536   while (FD) {
11537     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11538       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11539       // Mark uniform parameters.
11540       for (const Expr *E : Attr->uniforms()) {
11541         E = E->IgnoreParenImpCasts();
11542         unsigned Pos;
11543         if (isa<CXXThisExpr>(E)) {
11544           Pos = ParamPositions[FD];
11545         } else {
11546           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11547                                 ->getCanonicalDecl();
11548           Pos = ParamPositions[PVD];
11549         }
11550         ParamAttrs[Pos].Kind = Uniform;
11551       }
11552       // Get alignment info.
11553       auto NI = Attr->alignments_begin();
11554       for (const Expr *E : Attr->aligneds()) {
11555         E = E->IgnoreParenImpCasts();
11556         unsigned Pos;
11557         QualType ParmTy;
11558         if (isa<CXXThisExpr>(E)) {
11559           Pos = ParamPositions[FD];
11560           ParmTy = E->getType();
11561         } else {
11562           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11563                                 ->getCanonicalDecl();
11564           Pos = ParamPositions[PVD];
11565           ParmTy = PVD->getType();
11566         }
11567         ParamAttrs[Pos].Alignment =
11568             (*NI)
11569                 ? (*NI)->EvaluateKnownConstInt(C)
11570                 : llvm::APSInt::getUnsigned(
11571                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11572                           .getQuantity());
11573         ++NI;
11574       }
11575       // Mark linear parameters.
11576       auto SI = Attr->steps_begin();
11577       auto MI = Attr->modifiers_begin();
11578       for (const Expr *E : Attr->linears()) {
11579         E = E->IgnoreParenImpCasts();
11580         unsigned Pos;
11581         // Rescaling factor needed to compute the linear parameter
11582         // value in the mangled name.
11583         unsigned PtrRescalingFactor = 1;
11584         if (isa<CXXThisExpr>(E)) {
11585           Pos = ParamPositions[FD];
11586         } else {
11587           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11588                                 ->getCanonicalDecl();
11589           Pos = ParamPositions[PVD];
11590           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11591             PtrRescalingFactor = CGM.getContext()
11592                                      .getTypeSizeInChars(P->getPointeeType())
11593                                      .getQuantity();
11594         }
11595         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11596         ParamAttr.Kind = Linear;
11597         // Assuming a stride of 1, for `linear` without modifiers.
11598         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11599         if (*SI) {
11600           Expr::EvalResult Result;
11601           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11602             if (const auto *DRE =
11603                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11604               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11605                 ParamAttr.Kind = LinearWithVarStride;
11606                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11607                     ParamPositions[StridePVD->getCanonicalDecl()]);
11608               }
11609             }
11610           } else {
11611             ParamAttr.StrideOrArg = Result.Val.getInt();
11612           }
11613         }
11614         // If we are using a linear clause on a pointer, we need to
11615         // rescale the value of linear_step with the byte size of the
11616         // pointee type.
11617         if (Linear == ParamAttr.Kind)
11618           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11619         ++SI;
11620         ++MI;
11621       }
11622       llvm::APSInt VLENVal;
11623       SourceLocation ExprLoc;
11624       const Expr *VLENExpr = Attr->getSimdlen();
11625       if (VLENExpr) {
11626         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11627         ExprLoc = VLENExpr->getExprLoc();
11628       }
11629       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11630       if (CGM.getTriple().isX86()) {
11631         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11632       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11633         unsigned VLEN = VLENVal.getExtValue();
11634         StringRef MangledName = Fn->getName();
11635         if (CGM.getTarget().hasFeature("sve"))
11636           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11637                                          MangledName, 's', 128, Fn, ExprLoc);
11638         if (CGM.getTarget().hasFeature("neon"))
11639           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11640                                          MangledName, 'n', 128, Fn, ExprLoc);
11641       }
11642     }
11643     FD = FD->getPreviousDecl();
11644   }
11645 }
11646 
11647 namespace {
11648 /// Cleanup action for doacross support.
11649 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11650 public:
11651   static const int DoacrossFinArgs = 2;
11652 
11653 private:
11654   llvm::FunctionCallee RTLFn;
11655   llvm::Value *Args[DoacrossFinArgs];
11656 
11657 public:
11658   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11659                     ArrayRef<llvm::Value *> CallArgs)
11660       : RTLFn(RTLFn) {
11661     assert(CallArgs.size() == DoacrossFinArgs);
11662     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11663   }
11664   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11665     if (!CGF.HaveInsertPoint())
11666       return;
11667     CGF.EmitRuntimeCall(RTLFn, Args);
11668   }
11669 };
11670 } // namespace
11671 
11672 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11673                                        const OMPLoopDirective &D,
11674                                        ArrayRef<Expr *> NumIterations) {
11675   if (!CGF.HaveInsertPoint())
11676     return;
11677 
11678   ASTContext &C = CGM.getContext();
11679   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11680   RecordDecl *RD;
11681   if (KmpDimTy.isNull()) {
11682     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11683     //  kmp_int64 lo; // lower
11684     //  kmp_int64 up; // upper
11685     //  kmp_int64 st; // stride
11686     // };
11687     RD = C.buildImplicitRecord("kmp_dim");
11688     RD->startDefinition();
11689     addFieldToRecordDecl(C, RD, Int64Ty);
11690     addFieldToRecordDecl(C, RD, Int64Ty);
11691     addFieldToRecordDecl(C, RD, Int64Ty);
11692     RD->completeDefinition();
11693     KmpDimTy = C.getRecordType(RD);
11694   } else {
11695     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11696   }
11697   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11698   QualType ArrayTy =
11699       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11700 
11701   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11702   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11703   enum { LowerFD = 0, UpperFD, StrideFD };
11704   // Fill dims with data.
11705   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11706     LValue DimsLVal = CGF.MakeAddrLValue(
11707         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11708     // dims.upper = num_iterations;
11709     LValue UpperLVal = CGF.EmitLValueForField(
11710         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11711     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11712         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11713         Int64Ty, NumIterations[I]->getExprLoc());
11714     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11715     // dims.stride = 1;
11716     LValue StrideLVal = CGF.EmitLValueForField(
11717         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11718     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11719                           StrideLVal);
11720   }
11721 
11722   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11723   // kmp_int32 num_dims, struct kmp_dim * dims);
11724   llvm::Value *Args[] = {
11725       emitUpdateLocation(CGF, D.getBeginLoc()),
11726       getThreadID(CGF, D.getBeginLoc()),
11727       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11728       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11729           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11730           CGM.VoidPtrTy)};
11731 
11732   llvm::FunctionCallee RTLFn =
11733       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11734   CGF.EmitRuntimeCall(RTLFn, Args);
11735   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11736       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11737   llvm::FunctionCallee FiniRTLFn =
11738       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11739   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11740                                              llvm::makeArrayRef(FiniArgs));
11741 }
11742 
11743 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11744                                           const OMPDependClause *C) {
11745   QualType Int64Ty =
11746       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11747   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11748   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11749       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11750   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11751   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11752     const Expr *CounterVal = C->getLoopData(I);
11753     assert(CounterVal);
11754     llvm::Value *CntVal = CGF.EmitScalarConversion(
11755         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11756         CounterVal->getExprLoc());
11757     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11758                           /*Volatile=*/false, Int64Ty);
11759   }
11760   llvm::Value *Args[] = {
11761       emitUpdateLocation(CGF, C->getBeginLoc()),
11762       getThreadID(CGF, C->getBeginLoc()),
11763       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11764   llvm::FunctionCallee RTLFn;
11765   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11766     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11767   } else {
11768     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11769     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11770   }
11771   CGF.EmitRuntimeCall(RTLFn, Args);
11772 }
11773 
11774 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11775                                llvm::FunctionCallee Callee,
11776                                ArrayRef<llvm::Value *> Args) const {
11777   assert(Loc.isValid() && "Outlined function call location must be valid.");
11778   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11779 
11780   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11781     if (Fn->doesNotThrow()) {
11782       CGF.EmitNounwindRuntimeCall(Fn, Args);
11783       return;
11784     }
11785   }
11786   CGF.EmitRuntimeCall(Callee, Args);
11787 }
11788 
11789 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11790     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11791     ArrayRef<llvm::Value *> Args) const {
11792   emitCall(CGF, Loc, OutlinedFn, Args);
11793 }
11794 
11795 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11796   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11797     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11798       HasEmittedDeclareTargetRegion = true;
11799 }
11800 
11801 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11802                                              const VarDecl *NativeParam,
11803                                              const VarDecl *TargetParam) const {
11804   return CGF.GetAddrOfLocalVar(NativeParam);
11805 }
11806 
11807 namespace {
11808 /// Cleanup action for allocate support.
11809 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11810 public:
11811   static const int CleanupArgs = 3;
11812 
11813 private:
11814   llvm::FunctionCallee RTLFn;
11815   llvm::Value *Args[CleanupArgs];
11816 
11817 public:
11818   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11819                        ArrayRef<llvm::Value *> CallArgs)
11820       : RTLFn(RTLFn) {
11821     assert(CallArgs.size() == CleanupArgs &&
11822            "Size of arguments does not match.");
11823     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11824   }
11825   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11826     if (!CGF.HaveInsertPoint())
11827       return;
11828     CGF.EmitRuntimeCall(RTLFn, Args);
11829   }
11830 };
11831 } // namespace
11832 
11833 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11834                                                    const VarDecl *VD) {
11835   if (!VD)
11836     return Address::invalid();
11837   const VarDecl *CVD = VD->getCanonicalDecl();
11838   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11839     return Address::invalid();
11840   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11841   // Use the default allocation.
11842   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11843        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11844       !AA->getAllocator())
11845     return Address::invalid();
11846   llvm::Value *Size;
11847   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11848   if (CVD->getType()->isVariablyModifiedType()) {
11849     Size = CGF.getTypeSize(CVD->getType());
11850     // Align the size: ((size + align - 1) / align) * align
11851     Size = CGF.Builder.CreateNUWAdd(
11852         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11853     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11854     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11855   } else {
11856     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11857     Size = CGM.getSize(Sz.alignTo(Align));
11858   }
11859   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11860   assert(AA->getAllocator() &&
11861          "Expected allocator expression for non-default allocator.");
11862   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11863   // According to the standard, the original allocator type is a enum (integer).
11864   // Convert to pointer type, if required.
11865   if (Allocator->getType()->isIntegerTy())
11866     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11867   else if (Allocator->getType()->isPointerTy())
11868     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11869                                                                 CGM.VoidPtrTy);
11870   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11871 
11872   llvm::Value *Addr =
11873       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11874                           getName({CVD->getName(), ".void.addr"}));
11875   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11876                                                               Allocator};
11877   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11878 
11879   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11880                                                 llvm::makeArrayRef(FiniArgs));
11881   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11882       Addr,
11883       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11884       getName({CVD->getName(), ".addr"}));
11885   return Address(Addr, Align);
11886 }
11887 
11888 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11889     CodeGenModule &CGM, const OMPLoopDirective &S)
11890     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11891   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11892   if (!NeedToPush)
11893     return;
11894   NontemporalDeclsSet &DS =
11895       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11896   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11897     for (const Stmt *Ref : C->private_refs()) {
11898       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11899       const ValueDecl *VD;
11900       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11901         VD = DRE->getDecl();
11902       } else {
11903         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11904         assert((ME->isImplicitCXXThis() ||
11905                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11906                "Expected member of current class.");
11907         VD = ME->getMemberDecl();
11908       }
11909       DS.insert(VD);
11910     }
11911   }
11912 }
11913 
11914 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11915   if (!NeedToPush)
11916     return;
11917   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11918 }
11919 
11920 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11921   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11922 
11923   return llvm::any_of(
11924       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11925       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11926 }
11927 
11928 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11929     const OMPExecutableDirective &S,
11930     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11931     const {
11932   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11933   // Vars in target/task regions must be excluded completely.
11934   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11935       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11936     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11937     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11938     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11939     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11940       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11941         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11942     }
11943   }
11944   // Exclude vars in private clauses.
11945   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11946     for (const Expr *Ref : C->varlists()) {
11947       if (!Ref->getType()->isScalarType())
11948         continue;
11949       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11950       if (!DRE)
11951         continue;
11952       NeedToCheckForLPCs.insert(DRE->getDecl());
11953     }
11954   }
11955   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11956     for (const Expr *Ref : C->varlists()) {
11957       if (!Ref->getType()->isScalarType())
11958         continue;
11959       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11960       if (!DRE)
11961         continue;
11962       NeedToCheckForLPCs.insert(DRE->getDecl());
11963     }
11964   }
11965   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11966     for (const Expr *Ref : C->varlists()) {
11967       if (!Ref->getType()->isScalarType())
11968         continue;
11969       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11970       if (!DRE)
11971         continue;
11972       NeedToCheckForLPCs.insert(DRE->getDecl());
11973     }
11974   }
11975   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11976     for (const Expr *Ref : C->varlists()) {
11977       if (!Ref->getType()->isScalarType())
11978         continue;
11979       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11980       if (!DRE)
11981         continue;
11982       NeedToCheckForLPCs.insert(DRE->getDecl());
11983     }
11984   }
11985   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11986     for (const Expr *Ref : C->varlists()) {
11987       if (!Ref->getType()->isScalarType())
11988         continue;
11989       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11990       if (!DRE)
11991         continue;
11992       NeedToCheckForLPCs.insert(DRE->getDecl());
11993     }
11994   }
11995   for (const Decl *VD : NeedToCheckForLPCs) {
11996     for (const LastprivateConditionalData &Data :
11997          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11998       if (Data.DeclToUniqueName.count(VD) > 0) {
11999         if (!Data.Disabled)
12000           NeedToAddForLPCsAsDisabled.insert(VD);
12001         break;
12002       }
12003     }
12004   }
12005 }
12006 
12007 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12008     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12009     : CGM(CGF.CGM),
12010       Action((CGM.getLangOpts().OpenMP >= 50 &&
12011               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12012                            [](const OMPLastprivateClause *C) {
12013                              return C->getKind() ==
12014                                     OMPC_LASTPRIVATE_conditional;
12015                            }))
12016                  ? ActionToDo::PushAsLastprivateConditional
12017                  : ActionToDo::DoNotPush) {
12018   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12019   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12020     return;
12021   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12022          "Expected a push action.");
12023   LastprivateConditionalData &Data =
12024       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12025   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12026     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12027       continue;
12028 
12029     for (const Expr *Ref : C->varlists()) {
12030       Data.DeclToUniqueName.insert(std::make_pair(
12031           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12032           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12033     }
12034   }
12035   Data.IVLVal = IVLVal;
12036   Data.Fn = CGF.CurFn;
12037 }
12038 
12039 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12040     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12041     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12042   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12043   if (CGM.getLangOpts().OpenMP < 50)
12044     return;
12045   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12046   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12047   if (!NeedToAddForLPCsAsDisabled.empty()) {
12048     Action = ActionToDo::DisableLastprivateConditional;
12049     LastprivateConditionalData &Data =
12050         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12051     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12052       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12053     Data.Fn = CGF.CurFn;
12054     Data.Disabled = true;
12055   }
12056 }
12057 
12058 CGOpenMPRuntime::LastprivateConditionalRAII
12059 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12060     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12061   return LastprivateConditionalRAII(CGF, S);
12062 }
12063 
12064 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12065   if (CGM.getLangOpts().OpenMP < 50)
12066     return;
12067   if (Action == ActionToDo::DisableLastprivateConditional) {
12068     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12069            "Expected list of disabled private vars.");
12070     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12071   }
12072   if (Action == ActionToDo::PushAsLastprivateConditional) {
12073     assert(
12074         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12075         "Expected list of lastprivate conditional vars.");
12076     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12077   }
12078 }
12079 
12080 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12081                                                         const VarDecl *VD) {
12082   ASTContext &C = CGM.getContext();
12083   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12084   if (I == LastprivateConditionalToTypes.end())
12085     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12086   QualType NewType;
12087   const FieldDecl *VDField;
12088   const FieldDecl *FiredField;
12089   LValue BaseLVal;
12090   auto VI = I->getSecond().find(VD);
12091   if (VI == I->getSecond().end()) {
12092     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12093     RD->startDefinition();
12094     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12095     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12096     RD->completeDefinition();
12097     NewType = C.getRecordType(RD);
12098     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12099     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12100     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12101   } else {
12102     NewType = std::get<0>(VI->getSecond());
12103     VDField = std::get<1>(VI->getSecond());
12104     FiredField = std::get<2>(VI->getSecond());
12105     BaseLVal = std::get<3>(VI->getSecond());
12106   }
12107   LValue FiredLVal =
12108       CGF.EmitLValueForField(BaseLVal, FiredField);
12109   CGF.EmitStoreOfScalar(
12110       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12111       FiredLVal);
12112   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12113 }
12114 
12115 namespace {
12116 /// Checks if the lastprivate conditional variable is referenced in LHS.
12117 class LastprivateConditionalRefChecker final
12118     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12119   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12120   const Expr *FoundE = nullptr;
12121   const Decl *FoundD = nullptr;
12122   StringRef UniqueDeclName;
12123   LValue IVLVal;
12124   llvm::Function *FoundFn = nullptr;
12125   SourceLocation Loc;
12126 
12127 public:
12128   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12129     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12130          llvm::reverse(LPM)) {
12131       auto It = D.DeclToUniqueName.find(E->getDecl());
12132       if (It == D.DeclToUniqueName.end())
12133         continue;
12134       if (D.Disabled)
12135         return false;
12136       FoundE = E;
12137       FoundD = E->getDecl()->getCanonicalDecl();
12138       UniqueDeclName = It->second;
12139       IVLVal = D.IVLVal;
12140       FoundFn = D.Fn;
12141       break;
12142     }
12143     return FoundE == E;
12144   }
12145   bool VisitMemberExpr(const MemberExpr *E) {
12146     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12147       return false;
12148     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12149          llvm::reverse(LPM)) {
12150       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12151       if (It == D.DeclToUniqueName.end())
12152         continue;
12153       if (D.Disabled)
12154         return false;
12155       FoundE = E;
12156       FoundD = E->getMemberDecl()->getCanonicalDecl();
12157       UniqueDeclName = It->second;
12158       IVLVal = D.IVLVal;
12159       FoundFn = D.Fn;
12160       break;
12161     }
12162     return FoundE == E;
12163   }
12164   bool VisitStmt(const Stmt *S) {
12165     for (const Stmt *Child : S->children()) {
12166       if (!Child)
12167         continue;
12168       if (const auto *E = dyn_cast<Expr>(Child))
12169         if (!E->isGLValue())
12170           continue;
12171       if (Visit(Child))
12172         return true;
12173     }
12174     return false;
12175   }
12176   explicit LastprivateConditionalRefChecker(
12177       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12178       : LPM(LPM) {}
12179   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12180   getFoundData() const {
12181     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12182   }
12183 };
12184 } // namespace
12185 
12186 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12187                                                        LValue IVLVal,
12188                                                        StringRef UniqueDeclName,
12189                                                        LValue LVal,
12190                                                        SourceLocation Loc) {
12191   // Last updated loop counter for the lastprivate conditional var.
12192   // int<xx> last_iv = 0;
12193   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12194   llvm::Constant *LastIV =
12195       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12196   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12197       IVLVal.getAlignment().getAsAlign());
12198   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12199 
12200   // Last value of the lastprivate conditional.
12201   // decltype(priv_a) last_a;
12202   llvm::Constant *Last = getOrCreateInternalVariable(
12203       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12204   cast<llvm::GlobalVariable>(Last)->setAlignment(
12205       LVal.getAlignment().getAsAlign());
12206   LValue LastLVal =
12207       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12208 
12209   // Global loop counter. Required to handle inner parallel-for regions.
12210   // iv
12211   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12212 
12213   // #pragma omp critical(a)
12214   // if (last_iv <= iv) {
12215   //   last_iv = iv;
12216   //   last_a = priv_a;
12217   // }
12218   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12219                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12220     Action.Enter(CGF);
12221     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12222     // (last_iv <= iv) ? Check if the variable is updated and store new
12223     // value in global var.
12224     llvm::Value *CmpRes;
12225     if (IVLVal.getType()->isSignedIntegerType()) {
12226       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12227     } else {
12228       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12229              "Loop iteration variable must be integer.");
12230       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12231     }
12232     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12233     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12234     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12235     // {
12236     CGF.EmitBlock(ThenBB);
12237 
12238     //   last_iv = iv;
12239     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12240 
12241     //   last_a = priv_a;
12242     switch (CGF.getEvaluationKind(LVal.getType())) {
12243     case TEK_Scalar: {
12244       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12245       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12246       break;
12247     }
12248     case TEK_Complex: {
12249       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12250       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12251       break;
12252     }
12253     case TEK_Aggregate:
12254       llvm_unreachable(
12255           "Aggregates are not supported in lastprivate conditional.");
12256     }
12257     // }
12258     CGF.EmitBranch(ExitBB);
12259     // There is no need to emit line number for unconditional branch.
12260     (void)ApplyDebugLocation::CreateEmpty(CGF);
12261     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12262   };
12263 
12264   if (CGM.getLangOpts().OpenMPSimd) {
12265     // Do not emit as a critical region as no parallel region could be emitted.
12266     RegionCodeGenTy ThenRCG(CodeGen);
12267     ThenRCG(CGF);
12268   } else {
12269     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12270   }
12271 }
12272 
12273 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12274                                                          const Expr *LHS) {
12275   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12276     return;
12277   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12278   if (!Checker.Visit(LHS))
12279     return;
12280   const Expr *FoundE;
12281   const Decl *FoundD;
12282   StringRef UniqueDeclName;
12283   LValue IVLVal;
12284   llvm::Function *FoundFn;
12285   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12286       Checker.getFoundData();
12287   if (FoundFn != CGF.CurFn) {
12288     // Special codegen for inner parallel regions.
12289     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12290     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12291     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12292            "Lastprivate conditional is not found in outer region.");
12293     QualType StructTy = std::get<0>(It->getSecond());
12294     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12295     LValue PrivLVal = CGF.EmitLValue(FoundE);
12296     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12297         PrivLVal.getAddress(CGF),
12298         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12299     LValue BaseLVal =
12300         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12301     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12302     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12303                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12304                         FiredLVal, llvm::AtomicOrdering::Unordered,
12305                         /*IsVolatile=*/true, /*isInit=*/false);
12306     return;
12307   }
12308 
12309   // Private address of the lastprivate conditional in the current context.
12310   // priv_a
12311   LValue LVal = CGF.EmitLValue(FoundE);
12312   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12313                                    FoundE->getExprLoc());
12314 }
12315 
12316 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12317     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12318     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12319   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12320     return;
12321   auto Range = llvm::reverse(LastprivateConditionalStack);
12322   auto It = llvm::find_if(
12323       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12324   if (It == Range.end() || It->Fn != CGF.CurFn)
12325     return;
12326   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12327   assert(LPCI != LastprivateConditionalToTypes.end() &&
12328          "Lastprivates must be registered already.");
12329   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12330   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12331   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12332   for (const auto &Pair : It->DeclToUniqueName) {
12333     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12334     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12335       continue;
12336     auto I = LPCI->getSecond().find(Pair.first);
12337     assert(I != LPCI->getSecond().end() &&
12338            "Lastprivate must be rehistered already.");
12339     // bool Cmp = priv_a.Fired != 0;
12340     LValue BaseLVal = std::get<3>(I->getSecond());
12341     LValue FiredLVal =
12342         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12343     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12344     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12345     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12346     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12347     // if (Cmp) {
12348     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12349     CGF.EmitBlock(ThenBB);
12350     Address Addr = CGF.GetAddrOfLocalVar(VD);
12351     LValue LVal;
12352     if (VD->getType()->isReferenceType())
12353       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12354                                            AlignmentSource::Decl);
12355     else
12356       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12357                                 AlignmentSource::Decl);
12358     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12359                                      D.getBeginLoc());
12360     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12361     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12362     // }
12363   }
12364 }
12365 
12366 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12367     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12368     SourceLocation Loc) {
12369   if (CGF.getLangOpts().OpenMP < 50)
12370     return;
12371   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12372   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12373          "Unknown lastprivate conditional variable.");
12374   StringRef UniqueName = It->second;
12375   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12376   // The variable was not updated in the region - exit.
12377   if (!GV)
12378     return;
12379   LValue LPLVal = CGF.MakeAddrLValue(
12380       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12381   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12382   CGF.EmitStoreOfScalar(Res, PrivLVal);
12383 }
12384 
12385 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12386     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12387     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12388   llvm_unreachable("Not supported in SIMD-only mode");
12389 }
12390 
12391 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12392     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12393     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12394   llvm_unreachable("Not supported in SIMD-only mode");
12395 }
12396 
12397 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12398     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12399     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12400     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12401     bool Tied, unsigned &NumberOfParts) {
12402   llvm_unreachable("Not supported in SIMD-only mode");
12403 }
12404 
12405 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12406                                            SourceLocation Loc,
12407                                            llvm::Function *OutlinedFn,
12408                                            ArrayRef<llvm::Value *> CapturedVars,
12409                                            const Expr *IfCond) {
12410   llvm_unreachable("Not supported in SIMD-only mode");
12411 }
12412 
12413 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12414     CodeGenFunction &CGF, StringRef CriticalName,
12415     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12416     const Expr *Hint) {
12417   llvm_unreachable("Not supported in SIMD-only mode");
12418 }
12419 
12420 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12421                                            const RegionCodeGenTy &MasterOpGen,
12422                                            SourceLocation Loc) {
12423   llvm_unreachable("Not supported in SIMD-only mode");
12424 }
12425 
12426 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12427                                             SourceLocation Loc) {
12428   llvm_unreachable("Not supported in SIMD-only mode");
12429 }
12430 
12431 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12432     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12433     SourceLocation Loc) {
12434   llvm_unreachable("Not supported in SIMD-only mode");
12435 }
12436 
12437 void CGOpenMPSIMDRuntime::emitSingleRegion(
12438     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12439     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12440     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12441     ArrayRef<const Expr *> AssignmentOps) {
12442   llvm_unreachable("Not supported in SIMD-only mode");
12443 }
12444 
12445 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12446                                             const RegionCodeGenTy &OrderedOpGen,
12447                                             SourceLocation Loc,
12448                                             bool IsThreads) {
12449   llvm_unreachable("Not supported in SIMD-only mode");
12450 }
12451 
12452 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12453                                           SourceLocation Loc,
12454                                           OpenMPDirectiveKind Kind,
12455                                           bool EmitChecks,
12456                                           bool ForceSimpleCall) {
12457   llvm_unreachable("Not supported in SIMD-only mode");
12458 }
12459 
12460 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12461     CodeGenFunction &CGF, SourceLocation Loc,
12462     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12463     bool Ordered, const DispatchRTInput &DispatchValues) {
12464   llvm_unreachable("Not supported in SIMD-only mode");
12465 }
12466 
12467 void CGOpenMPSIMDRuntime::emitForStaticInit(
12468     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12469     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12470   llvm_unreachable("Not supported in SIMD-only mode");
12471 }
12472 
12473 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12474     CodeGenFunction &CGF, SourceLocation Loc,
12475     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12476   llvm_unreachable("Not supported in SIMD-only mode");
12477 }
12478 
12479 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12480                                                      SourceLocation Loc,
12481                                                      unsigned IVSize,
12482                                                      bool IVSigned) {
12483   llvm_unreachable("Not supported in SIMD-only mode");
12484 }
12485 
12486 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12487                                               SourceLocation Loc,
12488                                               OpenMPDirectiveKind DKind) {
12489   llvm_unreachable("Not supported in SIMD-only mode");
12490 }
12491 
12492 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12493                                               SourceLocation Loc,
12494                                               unsigned IVSize, bool IVSigned,
12495                                               Address IL, Address LB,
12496                                               Address UB, Address ST) {
12497   llvm_unreachable("Not supported in SIMD-only mode");
12498 }
12499 
12500 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12501                                                llvm::Value *NumThreads,
12502                                                SourceLocation Loc) {
12503   llvm_unreachable("Not supported in SIMD-only mode");
12504 }
12505 
12506 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12507                                              ProcBindKind ProcBind,
12508                                              SourceLocation Loc) {
12509   llvm_unreachable("Not supported in SIMD-only mode");
12510 }
12511 
12512 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12513                                                     const VarDecl *VD,
12514                                                     Address VDAddr,
12515                                                     SourceLocation Loc) {
12516   llvm_unreachable("Not supported in SIMD-only mode");
12517 }
12518 
12519 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12520     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12521     CodeGenFunction *CGF) {
12522   llvm_unreachable("Not supported in SIMD-only mode");
12523 }
12524 
12525 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12526     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12527   llvm_unreachable("Not supported in SIMD-only mode");
12528 }
12529 
12530 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12531                                     ArrayRef<const Expr *> Vars,
12532                                     SourceLocation Loc,
12533                                     llvm::AtomicOrdering AO) {
12534   llvm_unreachable("Not supported in SIMD-only mode");
12535 }
12536 
12537 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12538                                        const OMPExecutableDirective &D,
12539                                        llvm::Function *TaskFunction,
12540                                        QualType SharedsTy, Address Shareds,
12541                                        const Expr *IfCond,
12542                                        const OMPTaskDataTy &Data) {
12543   llvm_unreachable("Not supported in SIMD-only mode");
12544 }
12545 
12546 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12547     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12548     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12549     const Expr *IfCond, const OMPTaskDataTy &Data) {
12550   llvm_unreachable("Not supported in SIMD-only mode");
12551 }
12552 
12553 void CGOpenMPSIMDRuntime::emitReduction(
12554     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12555     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12556     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12557   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12558   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12559                                  ReductionOps, Options);
12560 }
12561 
12562 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12563     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12564     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12565   llvm_unreachable("Not supported in SIMD-only mode");
12566 }
12567 
12568 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12569                                                 SourceLocation Loc,
12570                                                 bool IsWorksharingReduction) {
12571   llvm_unreachable("Not supported in SIMD-only mode");
12572 }
12573 
12574 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12575                                                   SourceLocation Loc,
12576                                                   ReductionCodeGen &RCG,
12577                                                   unsigned N) {
12578   llvm_unreachable("Not supported in SIMD-only mode");
12579 }
12580 
12581 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12582                                                   SourceLocation Loc,
12583                                                   llvm::Value *ReductionsPtr,
12584                                                   LValue SharedLVal) {
12585   llvm_unreachable("Not supported in SIMD-only mode");
12586 }
12587 
12588 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12589                                            SourceLocation Loc) {
12590   llvm_unreachable("Not supported in SIMD-only mode");
12591 }
12592 
12593 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12594     CodeGenFunction &CGF, SourceLocation Loc,
12595     OpenMPDirectiveKind CancelRegion) {
12596   llvm_unreachable("Not supported in SIMD-only mode");
12597 }
12598 
12599 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12600                                          SourceLocation Loc, const Expr *IfCond,
12601                                          OpenMPDirectiveKind CancelRegion) {
12602   llvm_unreachable("Not supported in SIMD-only mode");
12603 }
12604 
12605 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12606     const OMPExecutableDirective &D, StringRef ParentName,
12607     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12608     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12609   llvm_unreachable("Not supported in SIMD-only mode");
12610 }
12611 
12612 void CGOpenMPSIMDRuntime::emitTargetCall(
12613     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12614     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12615     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12616     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12617                                      const OMPLoopDirective &D)>
12618         SizeEmitter) {
12619   llvm_unreachable("Not supported in SIMD-only mode");
12620 }
12621 
12622 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12623   llvm_unreachable("Not supported in SIMD-only mode");
12624 }
12625 
12626 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12627   llvm_unreachable("Not supported in SIMD-only mode");
12628 }
12629 
12630 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12631   return false;
12632 }
12633 
12634 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12635                                         const OMPExecutableDirective &D,
12636                                         SourceLocation Loc,
12637                                         llvm::Function *OutlinedFn,
12638                                         ArrayRef<llvm::Value *> CapturedVars) {
12639   llvm_unreachable("Not supported in SIMD-only mode");
12640 }
12641 
12642 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12643                                              const Expr *NumTeams,
12644                                              const Expr *ThreadLimit,
12645                                              SourceLocation Loc) {
12646   llvm_unreachable("Not supported in SIMD-only mode");
12647 }
12648 
12649 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12650     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12651     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12652   llvm_unreachable("Not supported in SIMD-only mode");
12653 }
12654 
12655 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12656     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12657     const Expr *Device) {
12658   llvm_unreachable("Not supported in SIMD-only mode");
12659 }
12660 
12661 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12662                                            const OMPLoopDirective &D,
12663                                            ArrayRef<Expr *> NumIterations) {
12664   llvm_unreachable("Not supported in SIMD-only mode");
12665 }
12666 
12667 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12668                                               const OMPDependClause *C) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 const VarDecl *
12673 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12674                                         const VarDecl *NativeParam) const {
12675   llvm_unreachable("Not supported in SIMD-only mode");
12676 }
12677 
12678 Address
12679 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12680                                          const VarDecl *NativeParam,
12681                                          const VarDecl *TargetParam) const {
12682   llvm_unreachable("Not supported in SIMD-only mode");
12683 }
12684