1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 } // anonymous namespace
479 
480 /// Describes ident structure that describes a source location.
481 /// All descriptions are taken from
482 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
483 /// Original structure:
484 /// typedef struct ident {
485 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
486 ///                                  see above  */
487 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
488 ///                                  KMP_IDENT_KMPC identifies this union
489 ///                                  member  */
490 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
491 ///                                  see above */
492 ///#if USE_ITT_BUILD
493 ///                            /*  but currently used for storing
494 ///                                region-specific ITT */
495 ///                            /*  contextual information. */
496 ///#endif /* USE_ITT_BUILD */
497 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
498 ///                                 C++  */
499 ///    char const *psource;    /**< String describing the source location.
500 ///                            The string is composed of semi-colon separated
501 //                             fields which describe the source file,
502 ///                            the function and a pair of line numbers that
503 ///                            delimit the construct.
504 ///                             */
505 /// } ident_t;
506 enum IdentFieldIndex {
507   /// might be used in Fortran
508   IdentField_Reserved_1,
509   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
510   IdentField_Flags,
511   /// Not really used in Fortran any more
512   IdentField_Reserved_2,
513   /// Source[4] in Fortran, do not use for C++
514   IdentField_Reserved_3,
515   /// String describing the source location. The string is composed of
516   /// semi-colon separated fields which describe the source file, the function
517   /// and a pair of line numbers that delimit the construct.
518   IdentField_PSource
519 };
520 
521 /// Schedule types for 'omp for' loops (these enumerators are taken from
522 /// the enum sched_type in kmp.h).
523 enum OpenMPSchedType {
524   /// Lower bound for default (unordered) versions.
525   OMP_sch_lower = 32,
526   OMP_sch_static_chunked = 33,
527   OMP_sch_static = 34,
528   OMP_sch_dynamic_chunked = 35,
529   OMP_sch_guided_chunked = 36,
530   OMP_sch_runtime = 37,
531   OMP_sch_auto = 38,
532   /// static with chunk adjustment (e.g., simd)
533   OMP_sch_static_balanced_chunked = 45,
534   /// Lower bound for 'ordered' versions.
535   OMP_ord_lower = 64,
536   OMP_ord_static_chunked = 65,
537   OMP_ord_static = 66,
538   OMP_ord_dynamic_chunked = 67,
539   OMP_ord_guided_chunked = 68,
540   OMP_ord_runtime = 69,
541   OMP_ord_auto = 70,
542   OMP_sch_default = OMP_sch_static,
543   /// dist_schedule types
544   OMP_dist_sch_static_chunked = 91,
545   OMP_dist_sch_static = 92,
546   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547   /// Set if the monotonic schedule modifier was present.
548   OMP_sch_modifier_monotonic = (1 << 29),
549   /// Set if the nonmonotonic schedule modifier was present.
550   OMP_sch_modifier_nonmonotonic = (1 << 30),
551 };
552 
553 enum OpenMPRTLFunction {
554   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
555   /// kmpc_micro microtask, ...);
556   OMPRTL__kmpc_fork_call,
557   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
558   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
559   OMPRTL__kmpc_threadprivate_cached,
560   /// Call to void __kmpc_threadprivate_register( ident_t *,
561   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
562   OMPRTL__kmpc_threadprivate_register,
563   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
564   OMPRTL__kmpc_global_thread_num,
565   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
566   // kmp_critical_name *crit);
567   OMPRTL__kmpc_critical,
568   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
569   // global_tid, kmp_critical_name *crit, uintptr_t hint);
570   OMPRTL__kmpc_critical_with_hint,
571   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_end_critical,
574   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
575   // global_tid);
576   OMPRTL__kmpc_cancel_barrier,
577   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
578   OMPRTL__kmpc_barrier,
579   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
580   OMPRTL__kmpc_for_static_fini,
581   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
582   // global_tid);
583   OMPRTL__kmpc_serialized_parallel,
584   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
585   // global_tid);
586   OMPRTL__kmpc_end_serialized_parallel,
587   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
588   // kmp_int32 num_threads);
589   OMPRTL__kmpc_push_num_threads,
590   // Call to void __kmpc_flush(ident_t *loc);
591   OMPRTL__kmpc_flush,
592   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
593   OMPRTL__kmpc_master,
594   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
595   OMPRTL__kmpc_end_master,
596   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
597   // int end_part);
598   OMPRTL__kmpc_omp_taskyield,
599   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
600   OMPRTL__kmpc_single,
601   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
602   OMPRTL__kmpc_end_single,
603   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
604   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
605   // kmp_routine_entry_t *task_entry);
606   OMPRTL__kmpc_omp_task_alloc,
607   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
608   // new_task);
609   OMPRTL__kmpc_omp_task,
610   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
611   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
612   // kmp_int32 didit);
613   OMPRTL__kmpc_copyprivate,
614   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
615   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
616   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
617   OMPRTL__kmpc_reduce,
618   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
619   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
620   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
621   // *lck);
622   OMPRTL__kmpc_reduce_nowait,
623   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
624   // kmp_critical_name *lck);
625   OMPRTL__kmpc_end_reduce,
626   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
627   // kmp_critical_name *lck);
628   OMPRTL__kmpc_end_reduce_nowait,
629   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
630   // kmp_task_t * new_task);
631   OMPRTL__kmpc_omp_task_begin_if0,
632   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
633   // kmp_task_t * new_task);
634   OMPRTL__kmpc_omp_task_complete_if0,
635   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
636   OMPRTL__kmpc_ordered,
637   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
638   OMPRTL__kmpc_end_ordered,
639   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
640   // global_tid);
641   OMPRTL__kmpc_omp_taskwait,
642   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
643   OMPRTL__kmpc_taskgroup,
644   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
645   OMPRTL__kmpc_end_taskgroup,
646   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
647   // int proc_bind);
648   OMPRTL__kmpc_push_proc_bind,
649   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
650   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
651   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
652   OMPRTL__kmpc_omp_task_with_deps,
653   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
654   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
655   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
656   OMPRTL__kmpc_omp_wait_deps,
657   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
658   // global_tid, kmp_int32 cncl_kind);
659   OMPRTL__kmpc_cancellationpoint,
660   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
661   // kmp_int32 cncl_kind);
662   OMPRTL__kmpc_cancel,
663   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
664   // kmp_int32 num_teams, kmp_int32 thread_limit);
665   OMPRTL__kmpc_push_num_teams,
666   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
667   // microtask, ...);
668   OMPRTL__kmpc_fork_teams,
669   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
670   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
671   // sched, kmp_uint64 grainsize, void *task_dup);
672   OMPRTL__kmpc_taskloop,
673   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
674   // num_dims, struct kmp_dim *dims);
675   OMPRTL__kmpc_doacross_init,
676   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
677   OMPRTL__kmpc_doacross_fini,
678   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
679   // *vec);
680   OMPRTL__kmpc_doacross_post,
681   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
682   // *vec);
683   OMPRTL__kmpc_doacross_wait,
684   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
685   // *data);
686   OMPRTL__kmpc_task_reduction_init,
687   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
688   // *d);
689   OMPRTL__kmpc_task_reduction_get_th_data,
690   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
691   OMPRTL__kmpc_alloc,
692   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
693   OMPRTL__kmpc_free,
694 
695   //
696   // Offloading related calls
697   //
698   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
699   // size);
700   OMPRTL__kmpc_push_target_tripcount,
701   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
702   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
703   // *arg_types);
704   OMPRTL__tgt_target,
705   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
706   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
707   // *arg_types);
708   OMPRTL__tgt_target_nowait,
709   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
710   // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
711   // *arg_types, int32_t num_teams, int32_t thread_limit);
712   OMPRTL__tgt_target_teams,
713   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
714   // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
715   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
716   OMPRTL__tgt_target_teams_nowait,
717   // Call to void __tgt_register_requires(int64_t flags);
718   OMPRTL__tgt_register_requires,
719   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
720   OMPRTL__tgt_register_lib,
721   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
722   OMPRTL__tgt_unregister_lib,
723   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
724   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
725   OMPRTL__tgt_target_data_begin,
726   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
727   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
728   // *arg_types);
729   OMPRTL__tgt_target_data_begin_nowait,
730   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
731   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
732   OMPRTL__tgt_target_data_end,
733   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
734   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
735   // *arg_types);
736   OMPRTL__tgt_target_data_end_nowait,
737   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
738   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
739   OMPRTL__tgt_target_data_update,
740   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
741   // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
742   // *arg_types);
743   OMPRTL__tgt_target_data_update_nowait,
744 };
745 
746 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
747 /// region.
748 class CleanupTy final : public EHScopeStack::Cleanup {
749   PrePostActionTy *Action;
750 
751 public:
752   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
753   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
754     if (!CGF.HaveInsertPoint())
755       return;
756     Action->Exit(CGF);
757   }
758 };
759 
760 } // anonymous namespace
761 
762 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
763   CodeGenFunction::RunCleanupsScope Scope(CGF);
764   if (PrePostAction) {
765     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
766     Callback(CodeGen, CGF, *PrePostAction);
767   } else {
768     PrePostActionTy Action;
769     Callback(CodeGen, CGF, Action);
770   }
771 }
772 
773 /// Check if the combiner is a call to UDR combiner and if it is so return the
774 /// UDR decl used for reduction.
775 static const OMPDeclareReductionDecl *
776 getReductionInit(const Expr *ReductionOp) {
777   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
778     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
779       if (const auto *DRE =
780               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
781         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
782           return DRD;
783   return nullptr;
784 }
785 
786 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
787                                              const OMPDeclareReductionDecl *DRD,
788                                              const Expr *InitOp,
789                                              Address Private, Address Original,
790                                              QualType Ty) {
791   if (DRD->getInitializer()) {
792     std::pair<llvm::Function *, llvm::Function *> Reduction =
793         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
794     const auto *CE = cast<CallExpr>(InitOp);
795     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
796     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
797     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
798     const auto *LHSDRE =
799         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
800     const auto *RHSDRE =
801         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
802     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
803     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
804                             [=]() { return Private; });
805     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
806                             [=]() { return Original; });
807     (void)PrivateScope.Privatize();
808     RValue Func = RValue::get(Reduction.second);
809     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
810     CGF.EmitIgnoredExpr(InitOp);
811   } else {
812     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
813     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
814     auto *GV = new llvm::GlobalVariable(
815         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
816         llvm::GlobalValue::PrivateLinkage, Init, Name);
817     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
818     RValue InitRVal;
819     switch (CGF.getEvaluationKind(Ty)) {
820     case TEK_Scalar:
821       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
822       break;
823     case TEK_Complex:
824       InitRVal =
825           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
826       break;
827     case TEK_Aggregate:
828       InitRVal = RValue::getAggregate(LV.getAddress());
829       break;
830     }
831     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
832     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
833     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
834                          /*IsInitializer=*/false);
835   }
836 }
837 
838 /// Emit initialization of arrays of complex types.
839 /// \param DestAddr Address of the array.
840 /// \param Type Type of array.
841 /// \param Init Initial expression of array.
842 /// \param SrcAddr Address of the original array.
843 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
844                                  QualType Type, bool EmitDeclareReductionInit,
845                                  const Expr *Init,
846                                  const OMPDeclareReductionDecl *DRD,
847                                  Address SrcAddr = Address::invalid()) {
848   // Perform element-by-element initialization.
849   QualType ElementTy;
850 
851   // Drill down to the base element type on both arrays.
852   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
853   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
854   DestAddr =
855       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
856   if (DRD)
857     SrcAddr =
858         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
859 
860   llvm::Value *SrcBegin = nullptr;
861   if (DRD)
862     SrcBegin = SrcAddr.getPointer();
863   llvm::Value *DestBegin = DestAddr.getPointer();
864   // Cast from pointer to array type to pointer to single element.
865   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
866   // The basic structure here is a while-do loop.
867   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
868   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
869   llvm::Value *IsEmpty =
870       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
871   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
872 
873   // Enter the loop body, making that address the current address.
874   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
875   CGF.EmitBlock(BodyBB);
876 
877   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
878 
879   llvm::PHINode *SrcElementPHI = nullptr;
880   Address SrcElementCurrent = Address::invalid();
881   if (DRD) {
882     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
883                                           "omp.arraycpy.srcElementPast");
884     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
885     SrcElementCurrent =
886         Address(SrcElementPHI,
887                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
888   }
889   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
890       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
891   DestElementPHI->addIncoming(DestBegin, EntryBB);
892   Address DestElementCurrent =
893       Address(DestElementPHI,
894               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
895 
896   // Emit copy.
897   {
898     CodeGenFunction::RunCleanupsScope InitScope(CGF);
899     if (EmitDeclareReductionInit) {
900       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
901                                        SrcElementCurrent, ElementTy);
902     } else
903       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
904                            /*IsInitializer=*/false);
905   }
906 
907   if (DRD) {
908     // Shift the address forward by one element.
909     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
910         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
911     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
912   }
913 
914   // Shift the address forward by one element.
915   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
916       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
917   // Check whether we've reached the end.
918   llvm::Value *Done =
919       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
920   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
921   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
922 
923   // Done.
924   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
925 }
926 
927 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
928   return CGF.EmitOMPSharedLValue(E);
929 }
930 
931 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
932                                             const Expr *E) {
933   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
934     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
935   return LValue();
936 }
937 
938 void ReductionCodeGen::emitAggregateInitialization(
939     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
940     const OMPDeclareReductionDecl *DRD) {
941   // Emit VarDecl with copy init for arrays.
942   // Get the address of the original variable captured in current
943   // captured region.
944   const auto *PrivateVD =
945       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
946   bool EmitDeclareReductionInit =
947       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
948   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
949                        EmitDeclareReductionInit,
950                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
951                                                 : PrivateVD->getInit(),
952                        DRD, SharedLVal.getAddress());
953 }
954 
955 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
956                                    ArrayRef<const Expr *> Privates,
957                                    ArrayRef<const Expr *> ReductionOps) {
958   ClausesData.reserve(Shareds.size());
959   SharedAddresses.reserve(Shareds.size());
960   Sizes.reserve(Shareds.size());
961   BaseDecls.reserve(Shareds.size());
962   auto IPriv = Privates.begin();
963   auto IRed = ReductionOps.begin();
964   for (const Expr *Ref : Shareds) {
965     ClausesData.emplace_back(Ref, *IPriv, *IRed);
966     std::advance(IPriv, 1);
967     std::advance(IRed, 1);
968   }
969 }
970 
971 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
972   assert(SharedAddresses.size() == N &&
973          "Number of generated lvalues must be exactly N.");
974   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
975   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
976   SharedAddresses.emplace_back(First, Second);
977 }
978 
979 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
980   const auto *PrivateVD =
981       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
982   QualType PrivateType = PrivateVD->getType();
983   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
984   if (!PrivateType->isVariablyModifiedType()) {
985     Sizes.emplace_back(
986         CGF.getTypeSize(
987             SharedAddresses[N].first.getType().getNonReferenceType()),
988         nullptr);
989     return;
990   }
991   llvm::Value *Size;
992   llvm::Value *SizeInChars;
993   auto *ElemType =
994       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
995           ->getElementType();
996   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
997   if (AsArraySection) {
998     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
999                                      SharedAddresses[N].first.getPointer());
1000     Size = CGF.Builder.CreateNUWAdd(
1001         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1002     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1003   } else {
1004     SizeInChars = CGF.getTypeSize(
1005         SharedAddresses[N].first.getType().getNonReferenceType());
1006     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1007   }
1008   Sizes.emplace_back(SizeInChars, Size);
1009   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1010       CGF,
1011       cast<OpaqueValueExpr>(
1012           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1013       RValue::get(Size));
1014   CGF.EmitVariablyModifiedType(PrivateType);
1015 }
1016 
1017 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1018                                          llvm::Value *Size) {
1019   const auto *PrivateVD =
1020       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1021   QualType PrivateType = PrivateVD->getType();
1022   if (!PrivateType->isVariablyModifiedType()) {
1023     assert(!Size && !Sizes[N].second &&
1024            "Size should be nullptr for non-variably modified reduction "
1025            "items.");
1026     return;
1027   }
1028   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1029       CGF,
1030       cast<OpaqueValueExpr>(
1031           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1032       RValue::get(Size));
1033   CGF.EmitVariablyModifiedType(PrivateType);
1034 }
1035 
1036 void ReductionCodeGen::emitInitialization(
1037     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1038     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1039   assert(SharedAddresses.size() > N && "No variable was generated");
1040   const auto *PrivateVD =
1041       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1042   const OMPDeclareReductionDecl *DRD =
1043       getReductionInit(ClausesData[N].ReductionOp);
1044   QualType PrivateType = PrivateVD->getType();
1045   PrivateAddr = CGF.Builder.CreateElementBitCast(
1046       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1047   QualType SharedType = SharedAddresses[N].first.getType();
1048   SharedLVal = CGF.MakeAddrLValue(
1049       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1050                                        CGF.ConvertTypeForMem(SharedType)),
1051       SharedType, SharedAddresses[N].first.getBaseInfo(),
1052       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1053   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1054     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1055   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1056     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1057                                      PrivateAddr, SharedLVal.getAddress(),
1058                                      SharedLVal.getType());
1059   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1060              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1061     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1062                          PrivateVD->getType().getQualifiers(),
1063                          /*IsInitializer=*/false);
1064   }
1065 }
1066 
1067 bool ReductionCodeGen::needCleanups(unsigned N) {
1068   const auto *PrivateVD =
1069       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1070   QualType PrivateType = PrivateVD->getType();
1071   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1072   return DTorKind != QualType::DK_none;
1073 }
1074 
1075 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1076                                     Address PrivateAddr) {
1077   const auto *PrivateVD =
1078       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1079   QualType PrivateType = PrivateVD->getType();
1080   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1081   if (needCleanups(N)) {
1082     PrivateAddr = CGF.Builder.CreateElementBitCast(
1083         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1084     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1085   }
1086 }
1087 
1088 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1089                           LValue BaseLV) {
1090   BaseTy = BaseTy.getNonReferenceType();
1091   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1092          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1093     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1094       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1095     } else {
1096       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1097       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1098     }
1099     BaseTy = BaseTy->getPointeeType();
1100   }
1101   return CGF.MakeAddrLValue(
1102       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1103                                        CGF.ConvertTypeForMem(ElTy)),
1104       BaseLV.getType(), BaseLV.getBaseInfo(),
1105       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1106 }
1107 
1108 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1109                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1110                           llvm::Value *Addr) {
1111   Address Tmp = Address::invalid();
1112   Address TopTmp = Address::invalid();
1113   Address MostTopTmp = Address::invalid();
1114   BaseTy = BaseTy.getNonReferenceType();
1115   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1116          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1117     Tmp = CGF.CreateMemTemp(BaseTy);
1118     if (TopTmp.isValid())
1119       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1120     else
1121       MostTopTmp = Tmp;
1122     TopTmp = Tmp;
1123     BaseTy = BaseTy->getPointeeType();
1124   }
1125   llvm::Type *Ty = BaseLVType;
1126   if (Tmp.isValid())
1127     Ty = Tmp.getElementType();
1128   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1129   if (Tmp.isValid()) {
1130     CGF.Builder.CreateStore(Addr, Tmp);
1131     return MostTopTmp;
1132   }
1133   return Address(Addr, BaseLVAlignment);
1134 }
1135 
1136 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1137   const VarDecl *OrigVD = nullptr;
1138   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1139     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1140     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1141       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1142     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1143       Base = TempASE->getBase()->IgnoreParenImpCasts();
1144     DE = cast<DeclRefExpr>(Base);
1145     OrigVD = cast<VarDecl>(DE->getDecl());
1146   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1147     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1148     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1149       Base = TempASE->getBase()->IgnoreParenImpCasts();
1150     DE = cast<DeclRefExpr>(Base);
1151     OrigVD = cast<VarDecl>(DE->getDecl());
1152   }
1153   return OrigVD;
1154 }
1155 
1156 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1157                                                Address PrivateAddr) {
1158   const DeclRefExpr *DE;
1159   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1160     BaseDecls.emplace_back(OrigVD);
1161     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1162     LValue BaseLValue =
1163         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1164                     OriginalBaseLValue);
1165     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1166         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1167     llvm::Value *PrivatePointer =
1168         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1169             PrivateAddr.getPointer(),
1170             SharedAddresses[N].first.getAddress().getType());
1171     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1172     return castToBase(CGF, OrigVD->getType(),
1173                       SharedAddresses[N].first.getType(),
1174                       OriginalBaseLValue.getAddress().getType(),
1175                       OriginalBaseLValue.getAlignment(), Ptr);
1176   }
1177   BaseDecls.emplace_back(
1178       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1179   return PrivateAddr;
1180 }
1181 
1182 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1183   const OMPDeclareReductionDecl *DRD =
1184       getReductionInit(ClausesData[N].ReductionOp);
1185   return DRD && DRD->getInitializer();
1186 }
1187 
1188 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1189   return CGF.EmitLoadOfPointerLValue(
1190       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1191       getThreadIDVariable()->getType()->castAs<PointerType>());
1192 }
1193 
1194 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1195   if (!CGF.HaveInsertPoint())
1196     return;
1197   // 1.2.2 OpenMP Language Terminology
1198   // Structured block - An executable statement with a single entry at the
1199   // top and a single exit at the bottom.
1200   // The point of exit cannot be a branch out of the structured block.
1201   // longjmp() and throw() must not violate the entry/exit criteria.
1202   CGF.EHStack.pushTerminate();
1203   CodeGen(CGF);
1204   CGF.EHStack.popTerminate();
1205 }
1206 
1207 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1208     CodeGenFunction &CGF) {
1209   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1210                             getThreadIDVariable()->getType(),
1211                             AlignmentSource::Decl);
1212 }
1213 
1214 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1215                                        QualType FieldTy) {
1216   auto *Field = FieldDecl::Create(
1217       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1218       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1219       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1220   Field->setAccess(AS_public);
1221   DC->addDecl(Field);
1222   return Field;
1223 }
1224 
1225 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1226                                  StringRef Separator)
1227     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1228       OffloadEntriesInfoManager(CGM) {
1229   ASTContext &C = CGM.getContext();
1230   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1231   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1232   RD->startDefinition();
1233   // reserved_1
1234   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1235   // flags
1236   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1237   // reserved_2
1238   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1239   // reserved_3
1240   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1241   // psource
1242   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1243   RD->completeDefinition();
1244   IdentQTy = C.getRecordType(RD);
1245   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1246   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1247 
1248   loadOffloadInfoMetadata();
1249 }
1250 
1251 void CGOpenMPRuntime::clear() {
1252   InternalVars.clear();
1253   // Clean non-target variable declarations possibly used only in debug info.
1254   for (const auto &Data : EmittedNonTargetVariables) {
1255     if (!Data.getValue().pointsToAliveValue())
1256       continue;
1257     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1258     if (!GV)
1259       continue;
1260     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1261       continue;
1262     GV->eraseFromParent();
1263   }
1264 }
1265 
1266 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1267   SmallString<128> Buffer;
1268   llvm::raw_svector_ostream OS(Buffer);
1269   StringRef Sep = FirstSeparator;
1270   for (StringRef Part : Parts) {
1271     OS << Sep << Part;
1272     Sep = Separator;
1273   }
1274   return OS.str();
1275 }
1276 
1277 static llvm::Function *
1278 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1279                           const Expr *CombinerInitializer, const VarDecl *In,
1280                           const VarDecl *Out, bool IsCombiner) {
1281   // void .omp_combiner.(Ty *in, Ty *out);
1282   ASTContext &C = CGM.getContext();
1283   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1284   FunctionArgList Args;
1285   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1286                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1287   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1288                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1289   Args.push_back(&OmpOutParm);
1290   Args.push_back(&OmpInParm);
1291   const CGFunctionInfo &FnInfo =
1292       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1293   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1294   std::string Name = CGM.getOpenMPRuntime().getName(
1295       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1296   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1297                                     Name, &CGM.getModule());
1298   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1299   if (CGM.getLangOpts().Optimize) {
1300     Fn->removeFnAttr(llvm::Attribute::NoInline);
1301     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1302     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1303   }
1304   CodeGenFunction CGF(CGM);
1305   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1306   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1307   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1308                     Out->getLocation());
1309   CodeGenFunction::OMPPrivateScope Scope(CGF);
1310   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1311   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1312     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1313         .getAddress();
1314   });
1315   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1316   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1317     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1318         .getAddress();
1319   });
1320   (void)Scope.Privatize();
1321   if (!IsCombiner && Out->hasInit() &&
1322       !CGF.isTrivialInitializer(Out->getInit())) {
1323     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1324                          Out->getType().getQualifiers(),
1325                          /*IsInitializer=*/true);
1326   }
1327   if (CombinerInitializer)
1328     CGF.EmitIgnoredExpr(CombinerInitializer);
1329   Scope.ForceCleanup();
1330   CGF.FinishFunction();
1331   return Fn;
1332 }
1333 
1334 void CGOpenMPRuntime::emitUserDefinedReduction(
1335     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1336   if (UDRMap.count(D) > 0)
1337     return;
1338   llvm::Function *Combiner = emitCombinerOrInitializer(
1339       CGM, D->getType(), D->getCombiner(),
1340       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1341       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1342       /*IsCombiner=*/true);
1343   llvm::Function *Initializer = nullptr;
1344   if (const Expr *Init = D->getInitializer()) {
1345     Initializer = emitCombinerOrInitializer(
1346         CGM, D->getType(),
1347         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1348                                                                      : nullptr,
1349         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1350         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1351         /*IsCombiner=*/false);
1352   }
1353   UDRMap.try_emplace(D, Combiner, Initializer);
1354   if (CGF) {
1355     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1356     Decls.second.push_back(D);
1357   }
1358 }
1359 
1360 std::pair<llvm::Function *, llvm::Function *>
1361 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1362   auto I = UDRMap.find(D);
1363   if (I != UDRMap.end())
1364     return I->second;
1365   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1366   return UDRMap.lookup(D);
1367 }
1368 
1369 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1370     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1371     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1372     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1373   assert(ThreadIDVar->getType()->isPointerType() &&
1374          "thread id variable must be of type kmp_int32 *");
1375   CodeGenFunction CGF(CGM, true);
1376   bool HasCancel = false;
1377   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1378     HasCancel = OPD->hasCancel();
1379   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1380     HasCancel = OPSD->hasCancel();
1381   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1382     HasCancel = OPFD->hasCancel();
1383   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1384     HasCancel = OPFD->hasCancel();
1385   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1386     HasCancel = OPFD->hasCancel();
1387   else if (const auto *OPFD =
1388                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1389     HasCancel = OPFD->hasCancel();
1390   else if (const auto *OPFD =
1391                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1392     HasCancel = OPFD->hasCancel();
1393   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1394                                     HasCancel, OutlinedHelperName);
1395   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1396   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1397 }
1398 
1399 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1400     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1401     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1402   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1403   return emitParallelOrTeamsOutlinedFunction(
1404       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1405 }
1406 
1407 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1408     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1409     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1410   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1411   return emitParallelOrTeamsOutlinedFunction(
1412       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1413 }
1414 
1415 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1416     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1417     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1418     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1419     bool Tied, unsigned &NumberOfParts) {
1420   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1421                                               PrePostActionTy &) {
1422     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1423     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1424     llvm::Value *TaskArgs[] = {
1425         UpLoc, ThreadID,
1426         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1427                                     TaskTVar->getType()->castAs<PointerType>())
1428             .getPointer()};
1429     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1430   };
1431   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1432                                                             UntiedCodeGen);
1433   CodeGen.setAction(Action);
1434   assert(!ThreadIDVar->getType()->isPointerType() &&
1435          "thread id variable must be of type kmp_int32 for tasks");
1436   const OpenMPDirectiveKind Region =
1437       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1438                                                       : OMPD_task;
1439   const CapturedStmt *CS = D.getCapturedStmt(Region);
1440   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1441   CodeGenFunction CGF(CGM, true);
1442   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1443                                         InnermostKind,
1444                                         TD ? TD->hasCancel() : false, Action);
1445   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1446   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1447   if (!Tied)
1448     NumberOfParts = Action.getNumberOfParts();
1449   return Res;
1450 }
1451 
1452 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1453                              const RecordDecl *RD, const CGRecordLayout &RL,
1454                              ArrayRef<llvm::Constant *> Data) {
1455   llvm::StructType *StructTy = RL.getLLVMType();
1456   unsigned PrevIdx = 0;
1457   ConstantInitBuilder CIBuilder(CGM);
1458   auto DI = Data.begin();
1459   for (const FieldDecl *FD : RD->fields()) {
1460     unsigned Idx = RL.getLLVMFieldNo(FD);
1461     // Fill the alignment.
1462     for (unsigned I = PrevIdx; I < Idx; ++I)
1463       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1464     PrevIdx = Idx + 1;
1465     Fields.add(*DI);
1466     ++DI;
1467   }
1468 }
1469 
1470 template <class... As>
1471 static llvm::GlobalVariable *
1472 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1473                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1474                    As &&... Args) {
1475   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1476   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1477   ConstantInitBuilder CIBuilder(CGM);
1478   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1479   buildStructValue(Fields, CGM, RD, RL, Data);
1480   return Fields.finishAndCreateGlobal(
1481       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1482       std::forward<As>(Args)...);
1483 }
1484 
1485 template <typename T>
1486 static void
1487 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1488                                          ArrayRef<llvm::Constant *> Data,
1489                                          T &Parent) {
1490   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1491   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1492   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1493   buildStructValue(Fields, CGM, RD, RL, Data);
1494   Fields.finishAndAddTo(Parent);
1495 }
1496 
1497 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1498   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1499   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1500   FlagsTy FlagsKey(Flags, Reserved2Flags);
1501   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1502   if (!Entry) {
1503     if (!DefaultOpenMPPSource) {
1504       // Initialize default location for psource field of ident_t structure of
1505       // all ident_t objects. Format is ";file;function;line;column;;".
1506       // Taken from
1507       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1508       DefaultOpenMPPSource =
1509           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1510       DefaultOpenMPPSource =
1511           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1512     }
1513 
1514     llvm::Constant *Data[] = {
1515         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1516         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1517         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1518         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1519     llvm::GlobalValue *DefaultOpenMPLocation =
1520         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1521                            llvm::GlobalValue::PrivateLinkage);
1522     DefaultOpenMPLocation->setUnnamedAddr(
1523         llvm::GlobalValue::UnnamedAddr::Global);
1524 
1525     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1526   }
1527   return Address(Entry, Align);
1528 }
1529 
1530 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1531                                              bool AtCurrentPoint) {
1532   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1533   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1534 
1535   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1536   if (AtCurrentPoint) {
1537     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1538         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1539   } else {
1540     Elem.second.ServiceInsertPt =
1541         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1542     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1543   }
1544 }
1545 
1546 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1547   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1548   if (Elem.second.ServiceInsertPt) {
1549     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1550     Elem.second.ServiceInsertPt = nullptr;
1551     Ptr->eraseFromParent();
1552   }
1553 }
1554 
1555 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1556                                                  SourceLocation Loc,
1557                                                  unsigned Flags) {
1558   Flags |= OMP_IDENT_KMPC;
1559   // If no debug info is generated - return global default location.
1560   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1561       Loc.isInvalid())
1562     return getOrCreateDefaultLocation(Flags).getPointer();
1563 
1564   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1565 
1566   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1567   Address LocValue = Address::invalid();
1568   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1569   if (I != OpenMPLocThreadIDMap.end())
1570     LocValue = Address(I->second.DebugLoc, Align);
1571 
1572   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1573   // GetOpenMPThreadID was called before this routine.
1574   if (!LocValue.isValid()) {
1575     // Generate "ident_t .kmpc_loc.addr;"
1576     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1577     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1578     Elem.second.DebugLoc = AI.getPointer();
1579     LocValue = AI;
1580 
1581     if (!Elem.second.ServiceInsertPt)
1582       setLocThreadIdInsertPt(CGF);
1583     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1584     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1585     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1586                              CGF.getTypeSize(IdentQTy));
1587   }
1588 
1589   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1590   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1591   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1592   LValue PSource =
1593       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1594 
1595   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1596   if (OMPDebugLoc == nullptr) {
1597     SmallString<128> Buffer2;
1598     llvm::raw_svector_ostream OS2(Buffer2);
1599     // Build debug location
1600     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1601     OS2 << ";" << PLoc.getFilename() << ";";
1602     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1603       OS2 << FD->getQualifiedNameAsString();
1604     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1605     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1606     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1607   }
1608   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1609   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1610 
1611   // Our callers always pass this to a runtime function, so for
1612   // convenience, go ahead and return a naked pointer.
1613   return LocValue.getPointer();
1614 }
1615 
1616 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1617                                           SourceLocation Loc) {
1618   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1619 
1620   llvm::Value *ThreadID = nullptr;
1621   // Check whether we've already cached a load of the thread id in this
1622   // function.
1623   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1624   if (I != OpenMPLocThreadIDMap.end()) {
1625     ThreadID = I->second.ThreadID;
1626     if (ThreadID != nullptr)
1627       return ThreadID;
1628   }
1629   // If exceptions are enabled, do not use parameter to avoid possible crash.
1630   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1631       !CGF.getLangOpts().CXXExceptions ||
1632       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1633     if (auto *OMPRegionInfo =
1634             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1635       if (OMPRegionInfo->getThreadIDVariable()) {
1636         // Check if this an outlined function with thread id passed as argument.
1637         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1638         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1639         // If value loaded in entry block, cache it and use it everywhere in
1640         // function.
1641         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1642           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1643           Elem.second.ThreadID = ThreadID;
1644         }
1645         return ThreadID;
1646       }
1647     }
1648   }
1649 
1650   // This is not an outlined function region - need to call __kmpc_int32
1651   // kmpc_global_thread_num(ident_t *loc).
1652   // Generate thread id value and cache this value for use across the
1653   // function.
1654   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1655   if (!Elem.second.ServiceInsertPt)
1656     setLocThreadIdInsertPt(CGF);
1657   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1658   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1659   llvm::CallInst *Call = CGF.Builder.CreateCall(
1660       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1661       emitUpdateLocation(CGF, Loc));
1662   Call->setCallingConv(CGF.getRuntimeCC());
1663   Elem.second.ThreadID = Call;
1664   return Call;
1665 }
1666 
1667 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1668   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1669   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1670     clearLocThreadIdInsertPt(CGF);
1671     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1672   }
1673   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1674     for(auto *D : FunctionUDRMap[CGF.CurFn])
1675       UDRMap.erase(D);
1676     FunctionUDRMap.erase(CGF.CurFn);
1677   }
1678 }
1679 
1680 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1681   return IdentTy->getPointerTo();
1682 }
1683 
1684 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1685   if (!Kmpc_MicroTy) {
1686     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1687     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1688                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1689     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1690   }
1691   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1692 }
1693 
1694 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1695   llvm::FunctionCallee RTLFn = nullptr;
1696   switch (static_cast<OpenMPRTLFunction>(Function)) {
1697   case OMPRTL__kmpc_fork_call: {
1698     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1699     // microtask, ...);
1700     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1701                                 getKmpc_MicroPointerTy()};
1702     auto *FnTy =
1703         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1704     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1705     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1706       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1707         llvm::LLVMContext &Ctx = F->getContext();
1708         llvm::MDBuilder MDB(Ctx);
1709         // Annotate the callback behavior of the __kmpc_fork_call:
1710         //  - The callback callee is argument number 2 (microtask).
1711         //  - The first two arguments of the callback callee are unknown (-1).
1712         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1713         //    callback callee.
1714         F->addMetadata(
1715             llvm::LLVMContext::MD_callback,
1716             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1717                                         2, {-1, -1},
1718                                         /* VarArgsArePassed */ true)}));
1719       }
1720     }
1721     break;
1722   }
1723   case OMPRTL__kmpc_global_thread_num: {
1724     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1725     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1726     auto *FnTy =
1727         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1728     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1729     break;
1730   }
1731   case OMPRTL__kmpc_threadprivate_cached: {
1732     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1733     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1734     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1735                                 CGM.VoidPtrTy, CGM.SizeTy,
1736                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1740     break;
1741   }
1742   case OMPRTL__kmpc_critical: {
1743     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1744     // kmp_critical_name *crit);
1745     llvm::Type *TypeParams[] = {
1746         getIdentTyPointerTy(), CGM.Int32Ty,
1747         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1751     break;
1752   }
1753   case OMPRTL__kmpc_critical_with_hint: {
1754     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1755     // kmp_critical_name *crit, uintptr_t hint);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1757                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1758                                 CGM.IntPtrTy};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1762     break;
1763   }
1764   case OMPRTL__kmpc_threadprivate_register: {
1765     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1766     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1767     // typedef void *(*kmpc_ctor)(void *);
1768     auto *KmpcCtorTy =
1769         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1770                                 /*isVarArg*/ false)->getPointerTo();
1771     // typedef void *(*kmpc_cctor)(void *, void *);
1772     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1773     auto *KmpcCopyCtorTy =
1774         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1775                                 /*isVarArg*/ false)
1776             ->getPointerTo();
1777     // typedef void (*kmpc_dtor)(void *);
1778     auto *KmpcDtorTy =
1779         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1780             ->getPointerTo();
1781     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1782                               KmpcCopyCtorTy, KmpcDtorTy};
1783     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1784                                         /*isVarArg*/ false);
1785     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1786     break;
1787   }
1788   case OMPRTL__kmpc_end_critical: {
1789     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1790     // kmp_critical_name *crit);
1791     llvm::Type *TypeParams[] = {
1792         getIdentTyPointerTy(), CGM.Int32Ty,
1793         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1794     auto *FnTy =
1795         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1796     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1797     break;
1798   }
1799   case OMPRTL__kmpc_cancel_barrier: {
1800     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1801     // global_tid);
1802     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1803     auto *FnTy =
1804         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1805     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1806     break;
1807   }
1808   case OMPRTL__kmpc_barrier: {
1809     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1810     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1811     auto *FnTy =
1812         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1813     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1814     break;
1815   }
1816   case OMPRTL__kmpc_for_static_fini: {
1817     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1818     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1819     auto *FnTy =
1820         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1821     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1822     break;
1823   }
1824   case OMPRTL__kmpc_push_num_threads: {
1825     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1826     // kmp_int32 num_threads)
1827     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1828                                 CGM.Int32Ty};
1829     auto *FnTy =
1830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1832     break;
1833   }
1834   case OMPRTL__kmpc_serialized_parallel: {
1835     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1836     // global_tid);
1837     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1838     auto *FnTy =
1839         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1840     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1841     break;
1842   }
1843   case OMPRTL__kmpc_end_serialized_parallel: {
1844     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1845     // global_tid);
1846     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1847     auto *FnTy =
1848         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1849     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1850     break;
1851   }
1852   case OMPRTL__kmpc_flush: {
1853     // Build void __kmpc_flush(ident_t *loc);
1854     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1855     auto *FnTy =
1856         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1857     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1858     break;
1859   }
1860   case OMPRTL__kmpc_master: {
1861     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1862     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1863     auto *FnTy =
1864         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1865     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1866     break;
1867   }
1868   case OMPRTL__kmpc_end_master: {
1869     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1870     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1871     auto *FnTy =
1872         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1873     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1874     break;
1875   }
1876   case OMPRTL__kmpc_omp_taskyield: {
1877     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1878     // int end_part);
1879     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1880     auto *FnTy =
1881         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1882     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1883     break;
1884   }
1885   case OMPRTL__kmpc_single: {
1886     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1887     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1888     auto *FnTy =
1889         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1890     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1891     break;
1892   }
1893   case OMPRTL__kmpc_end_single: {
1894     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1895     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1899     break;
1900   }
1901   case OMPRTL__kmpc_omp_task_alloc: {
1902     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1903     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1904     // kmp_routine_entry_t *task_entry);
1905     assert(KmpRoutineEntryPtrTy != nullptr &&
1906            "Type kmp_routine_entry_t must be created.");
1907     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1908                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1909     // Return void * and then cast to particular kmp_task_t type.
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1913     break;
1914   }
1915   case OMPRTL__kmpc_omp_task: {
1916     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1917     // *new_task);
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1919                                 CGM.VoidPtrTy};
1920     auto *FnTy =
1921         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1922     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1923     break;
1924   }
1925   case OMPRTL__kmpc_copyprivate: {
1926     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1927     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1928     // kmp_int32 didit);
1929     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1930     auto *CpyFnTy =
1931         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1932     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1933                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1934                                 CGM.Int32Ty};
1935     auto *FnTy =
1936         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1937     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1938     break;
1939   }
1940   case OMPRTL__kmpc_reduce: {
1941     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1942     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1943     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1944     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1945     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1946                                                /*isVarArg=*/false);
1947     llvm::Type *TypeParams[] = {
1948         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1949         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1950         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1951     auto *FnTy =
1952         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1953     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1954     break;
1955   }
1956   case OMPRTL__kmpc_reduce_nowait: {
1957     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1958     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1959     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1960     // *lck);
1961     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1962     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1963                                                /*isVarArg=*/false);
1964     llvm::Type *TypeParams[] = {
1965         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1966         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1967         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1968     auto *FnTy =
1969         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1970     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1971     break;
1972   }
1973   case OMPRTL__kmpc_end_reduce: {
1974     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1975     // kmp_critical_name *lck);
1976     llvm::Type *TypeParams[] = {
1977         getIdentTyPointerTy(), CGM.Int32Ty,
1978         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1981     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1982     break;
1983   }
1984   case OMPRTL__kmpc_end_reduce_nowait: {
1985     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1986     // kmp_critical_name *lck);
1987     llvm::Type *TypeParams[] = {
1988         getIdentTyPointerTy(), CGM.Int32Ty,
1989         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1990     auto *FnTy =
1991         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1992     RTLFn =
1993         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1994     break;
1995   }
1996   case OMPRTL__kmpc_omp_task_begin_if0: {
1997     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1998     // *new_task);
1999     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2000                                 CGM.VoidPtrTy};
2001     auto *FnTy =
2002         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2003     RTLFn =
2004         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2005     break;
2006   }
2007   case OMPRTL__kmpc_omp_task_complete_if0: {
2008     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2009     // *new_task);
2010     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2011                                 CGM.VoidPtrTy};
2012     auto *FnTy =
2013         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2014     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2015                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2016     break;
2017   }
2018   case OMPRTL__kmpc_ordered: {
2019     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2020     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2021     auto *FnTy =
2022         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2023     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2024     break;
2025   }
2026   case OMPRTL__kmpc_end_ordered: {
2027     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2029     auto *FnTy =
2030         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2031     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2032     break;
2033   }
2034   case OMPRTL__kmpc_omp_taskwait: {
2035     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2037     auto *FnTy =
2038         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2039     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2040     break;
2041   }
2042   case OMPRTL__kmpc_taskgroup: {
2043     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2044     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2045     auto *FnTy =
2046         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2047     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2048     break;
2049   }
2050   case OMPRTL__kmpc_end_taskgroup: {
2051     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2052     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2053     auto *FnTy =
2054         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2055     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2056     break;
2057   }
2058   case OMPRTL__kmpc_push_proc_bind: {
2059     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2060     // int proc_bind)
2061     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2062     auto *FnTy =
2063         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2064     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2065     break;
2066   }
2067   case OMPRTL__kmpc_omp_task_with_deps: {
2068     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2069     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2070     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2071     llvm::Type *TypeParams[] = {
2072         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2073         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2076     RTLFn =
2077         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2078     break;
2079   }
2080   case OMPRTL__kmpc_omp_wait_deps: {
2081     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2082     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2083     // kmp_depend_info_t *noalias_dep_list);
2084     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2085                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2086                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2087     auto *FnTy =
2088         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2089     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2090     break;
2091   }
2092   case OMPRTL__kmpc_cancellationpoint: {
2093     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2094     // global_tid, kmp_int32 cncl_kind)
2095     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2096     auto *FnTy =
2097         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2098     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2099     break;
2100   }
2101   case OMPRTL__kmpc_cancel: {
2102     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2103     // kmp_int32 cncl_kind)
2104     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2105     auto *FnTy =
2106         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2107     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2108     break;
2109   }
2110   case OMPRTL__kmpc_push_num_teams: {
2111     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2112     // kmp_int32 num_teams, kmp_int32 num_threads)
2113     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2114         CGM.Int32Ty};
2115     auto *FnTy =
2116         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2117     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2118     break;
2119   }
2120   case OMPRTL__kmpc_fork_teams: {
2121     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2122     // microtask, ...);
2123     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2124                                 getKmpc_MicroPointerTy()};
2125     auto *FnTy =
2126         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2127     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2128     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2129       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2130         llvm::LLVMContext &Ctx = F->getContext();
2131         llvm::MDBuilder MDB(Ctx);
2132         // Annotate the callback behavior of the __kmpc_fork_teams:
2133         //  - The callback callee is argument number 2 (microtask).
2134         //  - The first two arguments of the callback callee are unknown (-1).
2135         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2136         //    callback callee.
2137         F->addMetadata(
2138             llvm::LLVMContext::MD_callback,
2139             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2140                                         2, {-1, -1},
2141                                         /* VarArgsArePassed */ true)}));
2142       }
2143     }
2144     break;
2145   }
2146   case OMPRTL__kmpc_taskloop: {
2147     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2148     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2149     // sched, kmp_uint64 grainsize, void *task_dup);
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2151                                 CGM.IntTy,
2152                                 CGM.VoidPtrTy,
2153                                 CGM.IntTy,
2154                                 CGM.Int64Ty->getPointerTo(),
2155                                 CGM.Int64Ty->getPointerTo(),
2156                                 CGM.Int64Ty,
2157                                 CGM.IntTy,
2158                                 CGM.IntTy,
2159                                 CGM.Int64Ty,
2160                                 CGM.VoidPtrTy};
2161     auto *FnTy =
2162         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2163     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2164     break;
2165   }
2166   case OMPRTL__kmpc_doacross_init: {
2167     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2168     // num_dims, struct kmp_dim *dims);
2169     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2170                                 CGM.Int32Ty,
2171                                 CGM.Int32Ty,
2172                                 CGM.VoidPtrTy};
2173     auto *FnTy =
2174         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2175     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2176     break;
2177   }
2178   case OMPRTL__kmpc_doacross_fini: {
2179     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2181     auto *FnTy =
2182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2183     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2184     break;
2185   }
2186   case OMPRTL__kmpc_doacross_post: {
2187     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2188     // *vec);
2189     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2190                                 CGM.Int64Ty->getPointerTo()};
2191     auto *FnTy =
2192         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2193     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2194     break;
2195   }
2196   case OMPRTL__kmpc_doacross_wait: {
2197     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2198     // *vec);
2199     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2200                                 CGM.Int64Ty->getPointerTo()};
2201     auto *FnTy =
2202         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2203     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2204     break;
2205   }
2206   case OMPRTL__kmpc_task_reduction_init: {
2207     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2208     // *data);
2209     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn =
2213         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2214     break;
2215   }
2216   case OMPRTL__kmpc_task_reduction_get_th_data: {
2217     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2218     // *d);
2219     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2220     auto *FnTy =
2221         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2222     RTLFn = CGM.CreateRuntimeFunction(
2223         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2224     break;
2225   }
2226   case OMPRTL__kmpc_alloc: {
2227     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2228     // al); omp_allocator_handle_t type is void *.
2229     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2230     auto *FnTy =
2231         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2232     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2233     break;
2234   }
2235   case OMPRTL__kmpc_free: {
2236     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2237     // al); omp_allocator_handle_t type is void *.
2238     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2241     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2242     break;
2243   }
2244   case OMPRTL__kmpc_push_target_tripcount: {
2245     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2246     // size);
2247     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2248     llvm::FunctionType *FnTy =
2249         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2250     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2251     break;
2252   }
2253   case OMPRTL__tgt_target: {
2254     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2255     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2256     // *arg_types);
2257     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2258                                 CGM.VoidPtrTy,
2259                                 CGM.Int32Ty,
2260                                 CGM.VoidPtrPtrTy,
2261                                 CGM.VoidPtrPtrTy,
2262                                 CGM.SizeTy->getPointerTo(),
2263                                 CGM.Int64Ty->getPointerTo()};
2264     auto *FnTy =
2265         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2266     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2267     break;
2268   }
2269   case OMPRTL__tgt_target_nowait: {
2270     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2271     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2272     // int64_t *arg_types);
2273     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2274                                 CGM.VoidPtrTy,
2275                                 CGM.Int32Ty,
2276                                 CGM.VoidPtrPtrTy,
2277                                 CGM.VoidPtrPtrTy,
2278                                 CGM.SizeTy->getPointerTo(),
2279                                 CGM.Int64Ty->getPointerTo()};
2280     auto *FnTy =
2281         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2282     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2283     break;
2284   }
2285   case OMPRTL__tgt_target_teams: {
2286     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2287     // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2288     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2289     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2290                                 CGM.VoidPtrTy,
2291                                 CGM.Int32Ty,
2292                                 CGM.VoidPtrPtrTy,
2293                                 CGM.VoidPtrPtrTy,
2294                                 CGM.SizeTy->getPointerTo(),
2295                                 CGM.Int64Ty->getPointerTo(),
2296                                 CGM.Int32Ty,
2297                                 CGM.Int32Ty};
2298     auto *FnTy =
2299         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2300     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2301     break;
2302   }
2303   case OMPRTL__tgt_target_teams_nowait: {
2304     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2305     // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2306     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2307     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2308                                 CGM.VoidPtrTy,
2309                                 CGM.Int32Ty,
2310                                 CGM.VoidPtrPtrTy,
2311                                 CGM.VoidPtrPtrTy,
2312                                 CGM.SizeTy->getPointerTo(),
2313                                 CGM.Int64Ty->getPointerTo(),
2314                                 CGM.Int32Ty,
2315                                 CGM.Int32Ty};
2316     auto *FnTy =
2317         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2318     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2319     break;
2320   }
2321   case OMPRTL__tgt_register_requires: {
2322     // Build void __tgt_register_requires(int64_t flags);
2323     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2324     auto *FnTy =
2325         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2326     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2327     break;
2328   }
2329   case OMPRTL__tgt_register_lib: {
2330     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2331     QualType ParamTy =
2332         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2333     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2334     auto *FnTy =
2335         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2336     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2337     break;
2338   }
2339   case OMPRTL__tgt_unregister_lib: {
2340     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2341     QualType ParamTy =
2342         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2343     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2344     auto *FnTy =
2345         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2346     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2347     break;
2348   }
2349   case OMPRTL__tgt_target_data_begin: {
2350     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2351     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2352     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2353                                 CGM.Int32Ty,
2354                                 CGM.VoidPtrPtrTy,
2355                                 CGM.VoidPtrPtrTy,
2356                                 CGM.SizeTy->getPointerTo(),
2357                                 CGM.Int64Ty->getPointerTo()};
2358     auto *FnTy =
2359         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2360     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2361     break;
2362   }
2363   case OMPRTL__tgt_target_data_begin_nowait: {
2364     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2365     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2366     // *arg_types);
2367     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2368                                 CGM.Int32Ty,
2369                                 CGM.VoidPtrPtrTy,
2370                                 CGM.VoidPtrPtrTy,
2371                                 CGM.SizeTy->getPointerTo(),
2372                                 CGM.Int64Ty->getPointerTo()};
2373     auto *FnTy =
2374         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2375     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2376     break;
2377   }
2378   case OMPRTL__tgt_target_data_end: {
2379     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2380     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2381     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2382                                 CGM.Int32Ty,
2383                                 CGM.VoidPtrPtrTy,
2384                                 CGM.VoidPtrPtrTy,
2385                                 CGM.SizeTy->getPointerTo(),
2386                                 CGM.Int64Ty->getPointerTo()};
2387     auto *FnTy =
2388         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2389     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2390     break;
2391   }
2392   case OMPRTL__tgt_target_data_end_nowait: {
2393     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2394     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2395     // *arg_types);
2396     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2397                                 CGM.Int32Ty,
2398                                 CGM.VoidPtrPtrTy,
2399                                 CGM.VoidPtrPtrTy,
2400                                 CGM.SizeTy->getPointerTo(),
2401                                 CGM.Int64Ty->getPointerTo()};
2402     auto *FnTy =
2403         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2404     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2405     break;
2406   }
2407   case OMPRTL__tgt_target_data_update: {
2408     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2409     // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2410     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2411                                 CGM.Int32Ty,
2412                                 CGM.VoidPtrPtrTy,
2413                                 CGM.VoidPtrPtrTy,
2414                                 CGM.SizeTy->getPointerTo(),
2415                                 CGM.Int64Ty->getPointerTo()};
2416     auto *FnTy =
2417         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2418     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2419     break;
2420   }
2421   case OMPRTL__tgt_target_data_update_nowait: {
2422     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2423     // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2424     // *arg_types);
2425     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2426                                 CGM.Int32Ty,
2427                                 CGM.VoidPtrPtrTy,
2428                                 CGM.VoidPtrPtrTy,
2429                                 CGM.SizeTy->getPointerTo(),
2430                                 CGM.Int64Ty->getPointerTo()};
2431     auto *FnTy =
2432         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2433     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2434     break;
2435   }
2436   }
2437   assert(RTLFn && "Unable to find OpenMP runtime function");
2438   return RTLFn;
2439 }
2440 
2441 llvm::FunctionCallee
2442 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2443   assert((IVSize == 32 || IVSize == 64) &&
2444          "IV size is not compatible with the omp runtime");
2445   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2446                                             : "__kmpc_for_static_init_4u")
2447                                 : (IVSigned ? "__kmpc_for_static_init_8"
2448                                             : "__kmpc_for_static_init_8u");
2449   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2450   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2451   llvm::Type *TypeParams[] = {
2452     getIdentTyPointerTy(),                     // loc
2453     CGM.Int32Ty,                               // tid
2454     CGM.Int32Ty,                               // schedtype
2455     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2456     PtrTy,                                     // p_lower
2457     PtrTy,                                     // p_upper
2458     PtrTy,                                     // p_stride
2459     ITy,                                       // incr
2460     ITy                                        // chunk
2461   };
2462   auto *FnTy =
2463       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2464   return CGM.CreateRuntimeFunction(FnTy, Name);
2465 }
2466 
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2469   assert((IVSize == 32 || IVSize == 64) &&
2470          "IV size is not compatible with the omp runtime");
2471   StringRef Name =
2472       IVSize == 32
2473           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2474           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2475   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2477                                CGM.Int32Ty,           // tid
2478                                CGM.Int32Ty,           // schedtype
2479                                ITy,                   // lower
2480                                ITy,                   // upper
2481                                ITy,                   // stride
2482                                ITy                    // chunk
2483   };
2484   auto *FnTy =
2485       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2486   return CGM.CreateRuntimeFunction(FnTy, Name);
2487 }
2488 
2489 llvm::FunctionCallee
2490 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2491   assert((IVSize == 32 || IVSize == 64) &&
2492          "IV size is not compatible with the omp runtime");
2493   StringRef Name =
2494       IVSize == 32
2495           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2496           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2497   llvm::Type *TypeParams[] = {
2498       getIdentTyPointerTy(), // loc
2499       CGM.Int32Ty,           // tid
2500   };
2501   auto *FnTy =
2502       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2503   return CGM.CreateRuntimeFunction(FnTy, Name);
2504 }
2505 
2506 llvm::FunctionCallee
2507 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2508   assert((IVSize == 32 || IVSize == 64) &&
2509          "IV size is not compatible with the omp runtime");
2510   StringRef Name =
2511       IVSize == 32
2512           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2513           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2514   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2515   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2516   llvm::Type *TypeParams[] = {
2517     getIdentTyPointerTy(),                     // loc
2518     CGM.Int32Ty,                               // tid
2519     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2520     PtrTy,                                     // p_lower
2521     PtrTy,                                     // p_upper
2522     PtrTy                                      // p_stride
2523   };
2524   auto *FnTy =
2525       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2526   return CGM.CreateRuntimeFunction(FnTy, Name);
2527 }
2528 
2529 Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2530   if (CGM.getLangOpts().OpenMPSimd)
2531     return Address::invalid();
2532   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2533       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2534   if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2535     SmallString<64> PtrName;
2536     {
2537       llvm::raw_svector_ostream OS(PtrName);
2538       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2539     }
2540     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2541     if (!Ptr) {
2542       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2543       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2544                                         PtrName);
2545       if (!CGM.getLangOpts().OpenMPIsDevice) {
2546         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2547         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2548         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2549       }
2550       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2551       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2552     }
2553     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2554   }
2555   return Address::invalid();
2556 }
2557 
2558 llvm::Constant *
2559 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2560   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2561          !CGM.getContext().getTargetInfo().isTLSSupported());
2562   // Lookup the entry, lazily creating it if necessary.
2563   std::string Suffix = getName({"cache", ""});
2564   return getOrCreateInternalVariable(
2565       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2566 }
2567 
2568 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2569                                                 const VarDecl *VD,
2570                                                 Address VDAddr,
2571                                                 SourceLocation Loc) {
2572   if (CGM.getLangOpts().OpenMPUseTLS &&
2573       CGM.getContext().getTargetInfo().isTLSSupported())
2574     return VDAddr;
2575 
2576   llvm::Type *VarTy = VDAddr.getElementType();
2577   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2578                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2579                                                        CGM.Int8PtrTy),
2580                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2581                          getOrCreateThreadPrivateCache(VD)};
2582   return Address(CGF.EmitRuntimeCall(
2583       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2584                  VDAddr.getAlignment());
2585 }
2586 
2587 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2588     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2589     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2590   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2591   // library.
2592   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2593   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2594                       OMPLoc);
2595   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2596   // to register constructor/destructor for variable.
2597   llvm::Value *Args[] = {
2598       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2599       Ctor, CopyCtor, Dtor};
2600   CGF.EmitRuntimeCall(
2601       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2602 }
2603 
2604 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2605     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2606     bool PerformInit, CodeGenFunction *CGF) {
2607   if (CGM.getLangOpts().OpenMPUseTLS &&
2608       CGM.getContext().getTargetInfo().isTLSSupported())
2609     return nullptr;
2610 
2611   VD = VD->getDefinition(CGM.getContext());
2612   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2613     QualType ASTTy = VD->getType();
2614 
2615     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2616     const Expr *Init = VD->getAnyInitializer();
2617     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2618       // Generate function that re-emits the declaration's initializer into the
2619       // threadprivate copy of the variable VD
2620       CodeGenFunction CtorCGF(CGM);
2621       FunctionArgList Args;
2622       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2623                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2624                             ImplicitParamDecl::Other);
2625       Args.push_back(&Dst);
2626 
2627       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2628           CGM.getContext().VoidPtrTy, Args);
2629       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2630       std::string Name = getName({"__kmpc_global_ctor_", ""});
2631       llvm::Function *Fn =
2632           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2633       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2634                             Args, Loc, Loc);
2635       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2636           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2637           CGM.getContext().VoidPtrTy, Dst.getLocation());
2638       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2639       Arg = CtorCGF.Builder.CreateElementBitCast(
2640           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2641       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2642                                /*IsInitializer=*/true);
2643       ArgVal = CtorCGF.EmitLoadOfScalar(
2644           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2645           CGM.getContext().VoidPtrTy, Dst.getLocation());
2646       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2647       CtorCGF.FinishFunction();
2648       Ctor = Fn;
2649     }
2650     if (VD->getType().isDestructedType() != QualType::DK_none) {
2651       // Generate function that emits destructor call for the threadprivate copy
2652       // of the variable VD
2653       CodeGenFunction DtorCGF(CGM);
2654       FunctionArgList Args;
2655       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2656                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2657                             ImplicitParamDecl::Other);
2658       Args.push_back(&Dst);
2659 
2660       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2661           CGM.getContext().VoidTy, Args);
2662       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2663       std::string Name = getName({"__kmpc_global_dtor_", ""});
2664       llvm::Function *Fn =
2665           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2666       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2667       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2668                             Loc, Loc);
2669       // Create a scope with an artificial location for the body of this function.
2670       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2671       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2672           DtorCGF.GetAddrOfLocalVar(&Dst),
2673           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2674       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2675                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2676                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2677       DtorCGF.FinishFunction();
2678       Dtor = Fn;
2679     }
2680     // Do not emit init function if it is not required.
2681     if (!Ctor && !Dtor)
2682       return nullptr;
2683 
2684     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2685     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2686                                                /*isVarArg=*/false)
2687                            ->getPointerTo();
2688     // Copying constructor for the threadprivate variable.
2689     // Must be NULL - reserved by runtime, but currently it requires that this
2690     // parameter is always NULL. Otherwise it fires assertion.
2691     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2692     if (Ctor == nullptr) {
2693       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2694                                              /*isVarArg=*/false)
2695                          ->getPointerTo();
2696       Ctor = llvm::Constant::getNullValue(CtorTy);
2697     }
2698     if (Dtor == nullptr) {
2699       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2700                                              /*isVarArg=*/false)
2701                          ->getPointerTo();
2702       Dtor = llvm::Constant::getNullValue(DtorTy);
2703     }
2704     if (!CGF) {
2705       auto *InitFunctionTy =
2706           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2707       std::string Name = getName({"__omp_threadprivate_init_", ""});
2708       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2709           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2710       CodeGenFunction InitCGF(CGM);
2711       FunctionArgList ArgList;
2712       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2713                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2714                             Loc, Loc);
2715       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2716       InitCGF.FinishFunction();
2717       return InitFunction;
2718     }
2719     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2720   }
2721   return nullptr;
2722 }
2723 
2724 /// Obtain information that uniquely identifies a target entry. This
2725 /// consists of the file and device IDs as well as line number associated with
2726 /// the relevant entry source location.
2727 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2728                                      unsigned &DeviceID, unsigned &FileID,
2729                                      unsigned &LineNum) {
2730   SourceManager &SM = C.getSourceManager();
2731 
2732   // The loc should be always valid and have a file ID (the user cannot use
2733   // #pragma directives in macros)
2734 
2735   assert(Loc.isValid() && "Source location is expected to be always valid.");
2736 
2737   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2738   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2739 
2740   llvm::sys::fs::UniqueID ID;
2741   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2742     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2743         << PLoc.getFilename() << EC.message();
2744 
2745   DeviceID = ID.getDevice();
2746   FileID = ID.getFile();
2747   LineNum = PLoc.getLine();
2748 }
2749 
2750 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2751                                                      llvm::GlobalVariable *Addr,
2752                                                      bool PerformInit) {
2753   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2754       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2755   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2756     return CGM.getLangOpts().OpenMPIsDevice;
2757   VD = VD->getDefinition(CGM.getContext());
2758   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2759     return CGM.getLangOpts().OpenMPIsDevice;
2760 
2761   QualType ASTTy = VD->getType();
2762 
2763   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2764   // Produce the unique prefix to identify the new target regions. We use
2765   // the source location of the variable declaration which we know to not
2766   // conflict with any target region.
2767   unsigned DeviceID;
2768   unsigned FileID;
2769   unsigned Line;
2770   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2771   SmallString<128> Buffer, Out;
2772   {
2773     llvm::raw_svector_ostream OS(Buffer);
2774     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2775        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2776   }
2777 
2778   const Expr *Init = VD->getAnyInitializer();
2779   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2780     llvm::Constant *Ctor;
2781     llvm::Constant *ID;
2782     if (CGM.getLangOpts().OpenMPIsDevice) {
2783       // Generate function that re-emits the declaration's initializer into
2784       // the threadprivate copy of the variable VD
2785       CodeGenFunction CtorCGF(CGM);
2786 
2787       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2788       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2789       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2790           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2791       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2792       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2793                             FunctionArgList(), Loc, Loc);
2794       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2795       CtorCGF.EmitAnyExprToMem(Init,
2796                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2797                                Init->getType().getQualifiers(),
2798                                /*IsInitializer=*/true);
2799       CtorCGF.FinishFunction();
2800       Ctor = Fn;
2801       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2802       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2803     } else {
2804       Ctor = new llvm::GlobalVariable(
2805           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2806           llvm::GlobalValue::PrivateLinkage,
2807           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2808       ID = Ctor;
2809     }
2810 
2811     // Register the information for the entry associated with the constructor.
2812     Out.clear();
2813     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2814         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2815         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2816   }
2817   if (VD->getType().isDestructedType() != QualType::DK_none) {
2818     llvm::Constant *Dtor;
2819     llvm::Constant *ID;
2820     if (CGM.getLangOpts().OpenMPIsDevice) {
2821       // Generate function that emits destructor call for the threadprivate
2822       // copy of the variable VD
2823       CodeGenFunction DtorCGF(CGM);
2824 
2825       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2826       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2827       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2828           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2829       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2830       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2831                             FunctionArgList(), Loc, Loc);
2832       // Create a scope with an artificial location for the body of this
2833       // function.
2834       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2835       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2836                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2837                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2838       DtorCGF.FinishFunction();
2839       Dtor = Fn;
2840       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2841       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2842     } else {
2843       Dtor = new llvm::GlobalVariable(
2844           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2845           llvm::GlobalValue::PrivateLinkage,
2846           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2847       ID = Dtor;
2848     }
2849     // Register the information for the entry associated with the destructor.
2850     Out.clear();
2851     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2852         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2853         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2854   }
2855   return CGM.getLangOpts().OpenMPIsDevice;
2856 }
2857 
2858 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2859                                                           QualType VarType,
2860                                                           StringRef Name) {
2861   std::string Suffix = getName({"artificial", ""});
2862   std::string CacheSuffix = getName({"cache", ""});
2863   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2864   llvm::Value *GAddr =
2865       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2866   llvm::Value *Args[] = {
2867       emitUpdateLocation(CGF, SourceLocation()),
2868       getThreadID(CGF, SourceLocation()),
2869       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2870       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2871                                 /*IsSigned=*/false),
2872       getOrCreateInternalVariable(
2873           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2874   return Address(
2875       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2876           CGF.EmitRuntimeCall(
2877               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2878           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2879       CGM.getPointerAlign());
2880 }
2881 
2882 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2883                                       const RegionCodeGenTy &ThenGen,
2884                                       const RegionCodeGenTy &ElseGen) {
2885   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2886 
2887   // If the condition constant folds and can be elided, try to avoid emitting
2888   // the condition and the dead arm of the if/else.
2889   bool CondConstant;
2890   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2891     if (CondConstant)
2892       ThenGen(CGF);
2893     else
2894       ElseGen(CGF);
2895     return;
2896   }
2897 
2898   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2899   // emit the conditional branch.
2900   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2901   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2902   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2903   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2904 
2905   // Emit the 'then' code.
2906   CGF.EmitBlock(ThenBlock);
2907   ThenGen(CGF);
2908   CGF.EmitBranch(ContBlock);
2909   // Emit the 'else' code if present.
2910   // There is no need to emit line number for unconditional branch.
2911   (void)ApplyDebugLocation::CreateEmpty(CGF);
2912   CGF.EmitBlock(ElseBlock);
2913   ElseGen(CGF);
2914   // There is no need to emit line number for unconditional branch.
2915   (void)ApplyDebugLocation::CreateEmpty(CGF);
2916   CGF.EmitBranch(ContBlock);
2917   // Emit the continuation block for code after the if.
2918   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2919 }
2920 
2921 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2922                                        llvm::Function *OutlinedFn,
2923                                        ArrayRef<llvm::Value *> CapturedVars,
2924                                        const Expr *IfCond) {
2925   if (!CGF.HaveInsertPoint())
2926     return;
2927   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2928   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2929                                                      PrePostActionTy &) {
2930     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2931     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2932     llvm::Value *Args[] = {
2933         RTLoc,
2934         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2935         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2936     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2937     RealArgs.append(std::begin(Args), std::end(Args));
2938     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2939 
2940     llvm::FunctionCallee RTLFn =
2941         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2942     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2943   };
2944   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2945                                                           PrePostActionTy &) {
2946     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2947     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2948     // Build calls:
2949     // __kmpc_serialized_parallel(&Loc, GTid);
2950     llvm::Value *Args[] = {RTLoc, ThreadID};
2951     CGF.EmitRuntimeCall(
2952         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2953 
2954     // OutlinedFn(&GTid, &zero, CapturedStruct);
2955     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2956                                                         /*Name*/ ".zero.addr");
2957     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2958     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2959     // ThreadId for serialized parallels is 0.
2960     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2961     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2962     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2963     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2964 
2965     // __kmpc_end_serialized_parallel(&Loc, GTid);
2966     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2967     CGF.EmitRuntimeCall(
2968         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2969         EndArgs);
2970   };
2971   if (IfCond) {
2972     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2973   } else {
2974     RegionCodeGenTy ThenRCG(ThenGen);
2975     ThenRCG(CGF);
2976   }
2977 }
2978 
2979 // If we're inside an (outlined) parallel region, use the region info's
2980 // thread-ID variable (it is passed in a first argument of the outlined function
2981 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2982 // regular serial code region, get thread ID by calling kmp_int32
2983 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2984 // return the address of that temp.
2985 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2986                                              SourceLocation Loc) {
2987   if (auto *OMPRegionInfo =
2988           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2989     if (OMPRegionInfo->getThreadIDVariable())
2990       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2991 
2992   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2993   QualType Int32Ty =
2994       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2995   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2996   CGF.EmitStoreOfScalar(ThreadID,
2997                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2998 
2999   return ThreadIDTemp;
3000 }
3001 
3002 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3003     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3004   SmallString<256> Buffer;
3005   llvm::raw_svector_ostream Out(Buffer);
3006   Out << Name;
3007   StringRef RuntimeName = Out.str();
3008   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3009   if (Elem.second) {
3010     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3011            "OMP internal variable has different type than requested");
3012     return &*Elem.second;
3013   }
3014 
3015   return Elem.second = new llvm::GlobalVariable(
3016              CGM.getModule(), Ty, /*IsConstant*/ false,
3017              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3018              Elem.first(), /*InsertBefore=*/nullptr,
3019              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3020 }
3021 
3022 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3023   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3024   std::string Name = getName({Prefix, "var"});
3025   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3026 }
3027 
3028 namespace {
3029 /// Common pre(post)-action for different OpenMP constructs.
3030 class CommonActionTy final : public PrePostActionTy {
3031   llvm::FunctionCallee EnterCallee;
3032   ArrayRef<llvm::Value *> EnterArgs;
3033   llvm::FunctionCallee ExitCallee;
3034   ArrayRef<llvm::Value *> ExitArgs;
3035   bool Conditional;
3036   llvm::BasicBlock *ContBlock = nullptr;
3037 
3038 public:
3039   CommonActionTy(llvm::FunctionCallee EnterCallee,
3040                  ArrayRef<llvm::Value *> EnterArgs,
3041                  llvm::FunctionCallee ExitCallee,
3042                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3043       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3044         ExitArgs(ExitArgs), Conditional(Conditional) {}
3045   void Enter(CodeGenFunction &CGF) override {
3046     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3047     if (Conditional) {
3048       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3049       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3050       ContBlock = CGF.createBasicBlock("omp_if.end");
3051       // Generate the branch (If-stmt)
3052       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3053       CGF.EmitBlock(ThenBlock);
3054     }
3055   }
3056   void Done(CodeGenFunction &CGF) {
3057     // Emit the rest of blocks/branches
3058     CGF.EmitBranch(ContBlock);
3059     CGF.EmitBlock(ContBlock, true);
3060   }
3061   void Exit(CodeGenFunction &CGF) override {
3062     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3063   }
3064 };
3065 } // anonymous namespace
3066 
3067 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3068                                          StringRef CriticalName,
3069                                          const RegionCodeGenTy &CriticalOpGen,
3070                                          SourceLocation Loc, const Expr *Hint) {
3071   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3072   // CriticalOpGen();
3073   // __kmpc_end_critical(ident_t *, gtid, Lock);
3074   // Prepare arguments and build a call to __kmpc_critical
3075   if (!CGF.HaveInsertPoint())
3076     return;
3077   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3078                          getCriticalRegionLock(CriticalName)};
3079   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3080                                                 std::end(Args));
3081   if (Hint) {
3082     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3083         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3084   }
3085   CommonActionTy Action(
3086       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3087                                  : OMPRTL__kmpc_critical),
3088       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3089   CriticalOpGen.setAction(Action);
3090   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3091 }
3092 
3093 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3094                                        const RegionCodeGenTy &MasterOpGen,
3095                                        SourceLocation Loc) {
3096   if (!CGF.HaveInsertPoint())
3097     return;
3098   // if(__kmpc_master(ident_t *, gtid)) {
3099   //   MasterOpGen();
3100   //   __kmpc_end_master(ident_t *, gtid);
3101   // }
3102   // Prepare arguments and build a call to __kmpc_master
3103   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3104   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3105                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3106                         /*Conditional=*/true);
3107   MasterOpGen.setAction(Action);
3108   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3109   Action.Done(CGF);
3110 }
3111 
3112 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3113                                         SourceLocation Loc) {
3114   if (!CGF.HaveInsertPoint())
3115     return;
3116   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3117   llvm::Value *Args[] = {
3118       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3119       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3120   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3121   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3122     Region->emitUntiedSwitch(CGF);
3123 }
3124 
3125 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3126                                           const RegionCodeGenTy &TaskgroupOpGen,
3127                                           SourceLocation Loc) {
3128   if (!CGF.HaveInsertPoint())
3129     return;
3130   // __kmpc_taskgroup(ident_t *, gtid);
3131   // TaskgroupOpGen();
3132   // __kmpc_end_taskgroup(ident_t *, gtid);
3133   // Prepare arguments and build a call to __kmpc_taskgroup
3134   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3135   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3136                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3137                         Args);
3138   TaskgroupOpGen.setAction(Action);
3139   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3140 }
3141 
3142 /// Given an array of pointers to variables, project the address of a
3143 /// given variable.
3144 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3145                                       unsigned Index, const VarDecl *Var) {
3146   // Pull out the pointer to the variable.
3147   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3148   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3149 
3150   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3151   Addr = CGF.Builder.CreateElementBitCast(
3152       Addr, CGF.ConvertTypeForMem(Var->getType()));
3153   return Addr;
3154 }
3155 
3156 static llvm::Value *emitCopyprivateCopyFunction(
3157     CodeGenModule &CGM, llvm::Type *ArgsType,
3158     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3159     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3160     SourceLocation Loc) {
3161   ASTContext &C = CGM.getContext();
3162   // void copy_func(void *LHSArg, void *RHSArg);
3163   FunctionArgList Args;
3164   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3165                            ImplicitParamDecl::Other);
3166   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3167                            ImplicitParamDecl::Other);
3168   Args.push_back(&LHSArg);
3169   Args.push_back(&RHSArg);
3170   const auto &CGFI =
3171       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3172   std::string Name =
3173       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3174   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3175                                     llvm::GlobalValue::InternalLinkage, Name,
3176                                     &CGM.getModule());
3177   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3178   Fn->setDoesNotRecurse();
3179   CodeGenFunction CGF(CGM);
3180   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3181   // Dest = (void*[n])(LHSArg);
3182   // Src = (void*[n])(RHSArg);
3183   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3184       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3185       ArgsType), CGF.getPointerAlign());
3186   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3187       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3188       ArgsType), CGF.getPointerAlign());
3189   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3190   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3191   // ...
3192   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3193   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3194     const auto *DestVar =
3195         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3196     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3197 
3198     const auto *SrcVar =
3199         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3200     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3201 
3202     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3203     QualType Type = VD->getType();
3204     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3205   }
3206   CGF.FinishFunction();
3207   return Fn;
3208 }
3209 
3210 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3211                                        const RegionCodeGenTy &SingleOpGen,
3212                                        SourceLocation Loc,
3213                                        ArrayRef<const Expr *> CopyprivateVars,
3214                                        ArrayRef<const Expr *> SrcExprs,
3215                                        ArrayRef<const Expr *> DstExprs,
3216                                        ArrayRef<const Expr *> AssignmentOps) {
3217   if (!CGF.HaveInsertPoint())
3218     return;
3219   assert(CopyprivateVars.size() == SrcExprs.size() &&
3220          CopyprivateVars.size() == DstExprs.size() &&
3221          CopyprivateVars.size() == AssignmentOps.size());
3222   ASTContext &C = CGM.getContext();
3223   // int32 did_it = 0;
3224   // if(__kmpc_single(ident_t *, gtid)) {
3225   //   SingleOpGen();
3226   //   __kmpc_end_single(ident_t *, gtid);
3227   //   did_it = 1;
3228   // }
3229   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3230   // <copy_func>, did_it);
3231 
3232   Address DidIt = Address::invalid();
3233   if (!CopyprivateVars.empty()) {
3234     // int32 did_it = 0;
3235     QualType KmpInt32Ty =
3236         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3237     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3238     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3239   }
3240   // Prepare arguments and build a call to __kmpc_single
3241   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3242   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3243                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3244                         /*Conditional=*/true);
3245   SingleOpGen.setAction(Action);
3246   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3247   if (DidIt.isValid()) {
3248     // did_it = 1;
3249     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3250   }
3251   Action.Done(CGF);
3252   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3253   // <copy_func>, did_it);
3254   if (DidIt.isValid()) {
3255     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3256     QualType CopyprivateArrayTy =
3257         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3258                                /*IndexTypeQuals=*/0);
3259     // Create a list of all private variables for copyprivate.
3260     Address CopyprivateList =
3261         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3262     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3263       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3264       CGF.Builder.CreateStore(
3265           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3266               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3267           Elem);
3268     }
3269     // Build function that copies private values from single region to all other
3270     // threads in the corresponding parallel region.
3271     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3272         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3273         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3274     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3275     Address CL =
3276       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3277                                                       CGF.VoidPtrTy);
3278     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3279     llvm::Value *Args[] = {
3280         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3281         getThreadID(CGF, Loc),        // i32 <gtid>
3282         BufSize,                      // size_t <buf_size>
3283         CL.getPointer(),              // void *<copyprivate list>
3284         CpyFn,                        // void (*) (void *, void *) <copy_func>
3285         DidItVal                      // i32 did_it
3286     };
3287     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3288   }
3289 }
3290 
3291 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3292                                         const RegionCodeGenTy &OrderedOpGen,
3293                                         SourceLocation Loc, bool IsThreads) {
3294   if (!CGF.HaveInsertPoint())
3295     return;
3296   // __kmpc_ordered(ident_t *, gtid);
3297   // OrderedOpGen();
3298   // __kmpc_end_ordered(ident_t *, gtid);
3299   // Prepare arguments and build a call to __kmpc_ordered
3300   if (IsThreads) {
3301     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3302     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3303                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3304                           Args);
3305     OrderedOpGen.setAction(Action);
3306     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3307     return;
3308   }
3309   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3310 }
3311 
3312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3313   unsigned Flags;
3314   if (Kind == OMPD_for)
3315     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3316   else if (Kind == OMPD_sections)
3317     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3318   else if (Kind == OMPD_single)
3319     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3320   else if (Kind == OMPD_barrier)
3321     Flags = OMP_IDENT_BARRIER_EXPL;
3322   else
3323     Flags = OMP_IDENT_BARRIER_IMPL;
3324   return Flags;
3325 }
3326 
3327 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3328     CodeGenFunction &CGF, const OMPLoopDirective &S,
3329     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3330   // Check if the loop directive is actually a doacross loop directive. In this
3331   // case choose static, 1 schedule.
3332   if (llvm::any_of(
3333           S.getClausesOfKind<OMPOrderedClause>(),
3334           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3335     ScheduleKind = OMPC_SCHEDULE_static;
3336     // Chunk size is 1 in this case.
3337     llvm::APInt ChunkSize(32, 1);
3338     ChunkExpr = IntegerLiteral::Create(
3339         CGF.getContext(), ChunkSize,
3340         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3341         SourceLocation());
3342   }
3343 }
3344 
3345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3346                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3347                                       bool ForceSimpleCall) {
3348   if (!CGF.HaveInsertPoint())
3349     return;
3350   // Build call __kmpc_cancel_barrier(loc, thread_id);
3351   // Build call __kmpc_barrier(loc, thread_id);
3352   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3353   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3354   // thread_id);
3355   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3356                          getThreadID(CGF, Loc)};
3357   if (auto *OMPRegionInfo =
3358           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3359     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3360       llvm::Value *Result = CGF.EmitRuntimeCall(
3361           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3362       if (EmitChecks) {
3363         // if (__kmpc_cancel_barrier()) {
3364         //   exit from construct;
3365         // }
3366         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3367         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3368         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3369         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3370         CGF.EmitBlock(ExitBB);
3371         //   exit from construct;
3372         CodeGenFunction::JumpDest CancelDestination =
3373             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3374         CGF.EmitBranchThroughCleanup(CancelDestination);
3375         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3376       }
3377       return;
3378     }
3379   }
3380   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3381 }
3382 
3383 /// Map the OpenMP loop schedule to the runtime enumeration.
3384 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3385                                           bool Chunked, bool Ordered) {
3386   switch (ScheduleKind) {
3387   case OMPC_SCHEDULE_static:
3388     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3389                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3390   case OMPC_SCHEDULE_dynamic:
3391     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3392   case OMPC_SCHEDULE_guided:
3393     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3394   case OMPC_SCHEDULE_runtime:
3395     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3396   case OMPC_SCHEDULE_auto:
3397     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3398   case OMPC_SCHEDULE_unknown:
3399     assert(!Chunked && "chunk was specified but schedule kind not known");
3400     return Ordered ? OMP_ord_static : OMP_sch_static;
3401   }
3402   llvm_unreachable("Unexpected runtime schedule");
3403 }
3404 
3405 /// Map the OpenMP distribute schedule to the runtime enumeration.
3406 static OpenMPSchedType
3407 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3408   // only static is allowed for dist_schedule
3409   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3410 }
3411 
3412 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3413                                          bool Chunked) const {
3414   OpenMPSchedType Schedule =
3415       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3416   return Schedule == OMP_sch_static;
3417 }
3418 
3419 bool CGOpenMPRuntime::isStaticNonchunked(
3420     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3421   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3422   return Schedule == OMP_dist_sch_static;
3423 }
3424 
3425 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3426                                       bool Chunked) const {
3427   OpenMPSchedType Schedule =
3428       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3429   return Schedule == OMP_sch_static_chunked;
3430 }
3431 
3432 bool CGOpenMPRuntime::isStaticChunked(
3433     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3434   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3435   return Schedule == OMP_dist_sch_static_chunked;
3436 }
3437 
3438 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3439   OpenMPSchedType Schedule =
3440       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3441   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3442   return Schedule != OMP_sch_static;
3443 }
3444 
3445 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3446                                   OpenMPScheduleClauseModifier M1,
3447                                   OpenMPScheduleClauseModifier M2) {
3448   int Modifier = 0;
3449   switch (M1) {
3450   case OMPC_SCHEDULE_MODIFIER_monotonic:
3451     Modifier = OMP_sch_modifier_monotonic;
3452     break;
3453   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3454     Modifier = OMP_sch_modifier_nonmonotonic;
3455     break;
3456   case OMPC_SCHEDULE_MODIFIER_simd:
3457     if (Schedule == OMP_sch_static_chunked)
3458       Schedule = OMP_sch_static_balanced_chunked;
3459     break;
3460   case OMPC_SCHEDULE_MODIFIER_last:
3461   case OMPC_SCHEDULE_MODIFIER_unknown:
3462     break;
3463   }
3464   switch (M2) {
3465   case OMPC_SCHEDULE_MODIFIER_monotonic:
3466     Modifier = OMP_sch_modifier_monotonic;
3467     break;
3468   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3469     Modifier = OMP_sch_modifier_nonmonotonic;
3470     break;
3471   case OMPC_SCHEDULE_MODIFIER_simd:
3472     if (Schedule == OMP_sch_static_chunked)
3473       Schedule = OMP_sch_static_balanced_chunked;
3474     break;
3475   case OMPC_SCHEDULE_MODIFIER_last:
3476   case OMPC_SCHEDULE_MODIFIER_unknown:
3477     break;
3478   }
3479   return Schedule | Modifier;
3480 }
3481 
3482 void CGOpenMPRuntime::emitForDispatchInit(
3483     CodeGenFunction &CGF, SourceLocation Loc,
3484     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3485     bool Ordered, const DispatchRTInput &DispatchValues) {
3486   if (!CGF.HaveInsertPoint())
3487     return;
3488   OpenMPSchedType Schedule = getRuntimeSchedule(
3489       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3490   assert(Ordered ||
3491          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3492           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3493           Schedule != OMP_sch_static_balanced_chunked));
3494   // Call __kmpc_dispatch_init(
3495   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3496   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3497   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3498 
3499   // If the Chunk was not specified in the clause - use default value 1.
3500   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3501                                             : CGF.Builder.getIntN(IVSize, 1);
3502   llvm::Value *Args[] = {
3503       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3504       CGF.Builder.getInt32(addMonoNonMonoModifier(
3505           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3506       DispatchValues.LB,                                // Lower
3507       DispatchValues.UB,                                // Upper
3508       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3509       Chunk                                             // Chunk
3510   };
3511   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3512 }
3513 
3514 static void emitForStaticInitCall(
3515     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3516     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3517     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3518     const CGOpenMPRuntime::StaticRTInput &Values) {
3519   if (!CGF.HaveInsertPoint())
3520     return;
3521 
3522   assert(!Values.Ordered);
3523   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3524          Schedule == OMP_sch_static_balanced_chunked ||
3525          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3526          Schedule == OMP_dist_sch_static ||
3527          Schedule == OMP_dist_sch_static_chunked);
3528 
3529   // Call __kmpc_for_static_init(
3530   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3531   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3532   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3533   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3534   llvm::Value *Chunk = Values.Chunk;
3535   if (Chunk == nullptr) {
3536     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3537             Schedule == OMP_dist_sch_static) &&
3538            "expected static non-chunked schedule");
3539     // If the Chunk was not specified in the clause - use default value 1.
3540     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3541   } else {
3542     assert((Schedule == OMP_sch_static_chunked ||
3543             Schedule == OMP_sch_static_balanced_chunked ||
3544             Schedule == OMP_ord_static_chunked ||
3545             Schedule == OMP_dist_sch_static_chunked) &&
3546            "expected static chunked schedule");
3547   }
3548   llvm::Value *Args[] = {
3549       UpdateLocation,
3550       ThreadId,
3551       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3552                                                   M2)), // Schedule type
3553       Values.IL.getPointer(),                           // &isLastIter
3554       Values.LB.getPointer(),                           // &LB
3555       Values.UB.getPointer(),                           // &UB
3556       Values.ST.getPointer(),                           // &Stride
3557       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3558       Chunk                                             // Chunk
3559   };
3560   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3561 }
3562 
3563 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3564                                         SourceLocation Loc,
3565                                         OpenMPDirectiveKind DKind,
3566                                         const OpenMPScheduleTy &ScheduleKind,
3567                                         const StaticRTInput &Values) {
3568   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3569       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3570   assert(isOpenMPWorksharingDirective(DKind) &&
3571          "Expected loop-based or sections-based directive.");
3572   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3573                                              isOpenMPLoopDirective(DKind)
3574                                                  ? OMP_IDENT_WORK_LOOP
3575                                                  : OMP_IDENT_WORK_SECTIONS);
3576   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3577   llvm::FunctionCallee StaticInitFunction =
3578       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3579   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3580                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3581 }
3582 
3583 void CGOpenMPRuntime::emitDistributeStaticInit(
3584     CodeGenFunction &CGF, SourceLocation Loc,
3585     OpenMPDistScheduleClauseKind SchedKind,
3586     const CGOpenMPRuntime::StaticRTInput &Values) {
3587   OpenMPSchedType ScheduleNum =
3588       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3589   llvm::Value *UpdatedLocation =
3590       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3591   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3592   llvm::FunctionCallee StaticInitFunction =
3593       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3594   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3595                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3596                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3597 }
3598 
3599 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3600                                           SourceLocation Loc,
3601                                           OpenMPDirectiveKind DKind) {
3602   if (!CGF.HaveInsertPoint())
3603     return;
3604   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3605   llvm::Value *Args[] = {
3606       emitUpdateLocation(CGF, Loc,
3607                          isOpenMPDistributeDirective(DKind)
3608                              ? OMP_IDENT_WORK_DISTRIBUTE
3609                              : isOpenMPLoopDirective(DKind)
3610                                    ? OMP_IDENT_WORK_LOOP
3611                                    : OMP_IDENT_WORK_SECTIONS),
3612       getThreadID(CGF, Loc)};
3613   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3614                       Args);
3615 }
3616 
3617 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3618                                                  SourceLocation Loc,
3619                                                  unsigned IVSize,
3620                                                  bool IVSigned) {
3621   if (!CGF.HaveInsertPoint())
3622     return;
3623   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3624   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3625   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3626 }
3627 
3628 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3629                                           SourceLocation Loc, unsigned IVSize,
3630                                           bool IVSigned, Address IL,
3631                                           Address LB, Address UB,
3632                                           Address ST) {
3633   // Call __kmpc_dispatch_next(
3634   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3635   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3636   //          kmp_int[32|64] *p_stride);
3637   llvm::Value *Args[] = {
3638       emitUpdateLocation(CGF, Loc),
3639       getThreadID(CGF, Loc),
3640       IL.getPointer(), // &isLastIter
3641       LB.getPointer(), // &Lower
3642       UB.getPointer(), // &Upper
3643       ST.getPointer()  // &Stride
3644   };
3645   llvm::Value *Call =
3646       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3647   return CGF.EmitScalarConversion(
3648       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3649       CGF.getContext().BoolTy, Loc);
3650 }
3651 
3652 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3653                                            llvm::Value *NumThreads,
3654                                            SourceLocation Loc) {
3655   if (!CGF.HaveInsertPoint())
3656     return;
3657   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3658   llvm::Value *Args[] = {
3659       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3660       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3661   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3662                       Args);
3663 }
3664 
3665 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3666                                          OpenMPProcBindClauseKind ProcBind,
3667                                          SourceLocation Loc) {
3668   if (!CGF.HaveInsertPoint())
3669     return;
3670   // Constants for proc bind value accepted by the runtime.
3671   enum ProcBindTy {
3672     ProcBindFalse = 0,
3673     ProcBindTrue,
3674     ProcBindMaster,
3675     ProcBindClose,
3676     ProcBindSpread,
3677     ProcBindIntel,
3678     ProcBindDefault
3679   } RuntimeProcBind;
3680   switch (ProcBind) {
3681   case OMPC_PROC_BIND_master:
3682     RuntimeProcBind = ProcBindMaster;
3683     break;
3684   case OMPC_PROC_BIND_close:
3685     RuntimeProcBind = ProcBindClose;
3686     break;
3687   case OMPC_PROC_BIND_spread:
3688     RuntimeProcBind = ProcBindSpread;
3689     break;
3690   case OMPC_PROC_BIND_unknown:
3691     llvm_unreachable("Unsupported proc_bind value.");
3692   }
3693   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3694   llvm::Value *Args[] = {
3695       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3696       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3697   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3698 }
3699 
3700 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3701                                 SourceLocation Loc) {
3702   if (!CGF.HaveInsertPoint())
3703     return;
3704   // Build call void __kmpc_flush(ident_t *loc)
3705   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3706                       emitUpdateLocation(CGF, Loc));
3707 }
3708 
3709 namespace {
3710 /// Indexes of fields for type kmp_task_t.
3711 enum KmpTaskTFields {
3712   /// List of shared variables.
3713   KmpTaskTShareds,
3714   /// Task routine.
3715   KmpTaskTRoutine,
3716   /// Partition id for the untied tasks.
3717   KmpTaskTPartId,
3718   /// Function with call of destructors for private variables.
3719   Data1,
3720   /// Task priority.
3721   Data2,
3722   /// (Taskloops only) Lower bound.
3723   KmpTaskTLowerBound,
3724   /// (Taskloops only) Upper bound.
3725   KmpTaskTUpperBound,
3726   /// (Taskloops only) Stride.
3727   KmpTaskTStride,
3728   /// (Taskloops only) Is last iteration flag.
3729   KmpTaskTLastIter,
3730   /// (Taskloops only) Reduction data.
3731   KmpTaskTReductions,
3732 };
3733 } // anonymous namespace
3734 
3735 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3736   return OffloadEntriesTargetRegion.empty() &&
3737          OffloadEntriesDeviceGlobalVar.empty();
3738 }
3739 
3740 /// Initialize target region entry.
3741 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3742     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3743                                     StringRef ParentName, unsigned LineNum,
3744                                     unsigned Order) {
3745   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3746                                              "only required for the device "
3747                                              "code generation.");
3748   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3749       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3750                                    OMPTargetRegionEntryTargetRegion);
3751   ++OffloadingEntriesNum;
3752 }
3753 
3754 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3755     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3756                                   StringRef ParentName, unsigned LineNum,
3757                                   llvm::Constant *Addr, llvm::Constant *ID,
3758                                   OMPTargetRegionEntryKind Flags) {
3759   // If we are emitting code for a target, the entry is already initialized,
3760   // only has to be registered.
3761   if (CGM.getLangOpts().OpenMPIsDevice) {
3762     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3763       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3764           DiagnosticsEngine::Error,
3765           "Unable to find target region on line '%0' in the device code.");
3766       CGM.getDiags().Report(DiagID) << LineNum;
3767       return;
3768     }
3769     auto &Entry =
3770         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3771     assert(Entry.isValid() && "Entry not initialized!");
3772     Entry.setAddress(Addr);
3773     Entry.setID(ID);
3774     Entry.setFlags(Flags);
3775   } else {
3776     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3777     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3778     ++OffloadingEntriesNum;
3779   }
3780 }
3781 
3782 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3783     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3784     unsigned LineNum) const {
3785   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3786   if (PerDevice == OffloadEntriesTargetRegion.end())
3787     return false;
3788   auto PerFile = PerDevice->second.find(FileID);
3789   if (PerFile == PerDevice->second.end())
3790     return false;
3791   auto PerParentName = PerFile->second.find(ParentName);
3792   if (PerParentName == PerFile->second.end())
3793     return false;
3794   auto PerLine = PerParentName->second.find(LineNum);
3795   if (PerLine == PerParentName->second.end())
3796     return false;
3797   // Fail if this entry is already registered.
3798   if (PerLine->second.getAddress() || PerLine->second.getID())
3799     return false;
3800   return true;
3801 }
3802 
3803 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3804     const OffloadTargetRegionEntryInfoActTy &Action) {
3805   // Scan all target region entries and perform the provided action.
3806   for (const auto &D : OffloadEntriesTargetRegion)
3807     for (const auto &F : D.second)
3808       for (const auto &P : F.second)
3809         for (const auto &L : P.second)
3810           Action(D.first, F.first, P.first(), L.first, L.second);
3811 }
3812 
3813 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3814     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3815                                        OMPTargetGlobalVarEntryKind Flags,
3816                                        unsigned Order) {
3817   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3818                                              "only required for the device "
3819                                              "code generation.");
3820   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3821   ++OffloadingEntriesNum;
3822 }
3823 
3824 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3825     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3826                                      CharUnits VarSize,
3827                                      OMPTargetGlobalVarEntryKind Flags,
3828                                      llvm::GlobalValue::LinkageTypes Linkage) {
3829   if (CGM.getLangOpts().OpenMPIsDevice) {
3830     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3831     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3832            "Entry not initialized!");
3833     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3834            "Resetting with the new address.");
3835     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3836       if (Entry.getVarSize().isZero()) {
3837         Entry.setVarSize(VarSize);
3838         Entry.setLinkage(Linkage);
3839       }
3840       return;
3841     }
3842     Entry.setVarSize(VarSize);
3843     Entry.setLinkage(Linkage);
3844     Entry.setAddress(Addr);
3845   } else {
3846     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3847       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3848       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3849              "Entry not initialized!");
3850       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3851              "Resetting with the new address.");
3852       if (Entry.getVarSize().isZero()) {
3853         Entry.setVarSize(VarSize);
3854         Entry.setLinkage(Linkage);
3855       }
3856       return;
3857     }
3858     OffloadEntriesDeviceGlobalVar.try_emplace(
3859         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3860     ++OffloadingEntriesNum;
3861   }
3862 }
3863 
3864 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3865     actOnDeviceGlobalVarEntriesInfo(
3866         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3867   // Scan all target region entries and perform the provided action.
3868   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3869     Action(E.getKey(), E.getValue());
3870 }
3871 
3872 llvm::Function *
3873 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3874   // If we don't have entries or if we are emitting code for the device, we
3875   // don't need to do anything.
3876   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3877     return nullptr;
3878 
3879   llvm::Module &M = CGM.getModule();
3880   ASTContext &C = CGM.getContext();
3881 
3882   // Get list of devices we care about
3883   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3884 
3885   // We should be creating an offloading descriptor only if there are devices
3886   // specified.
3887   assert(!Devices.empty() && "No OpenMP offloading devices??");
3888 
3889   // Create the external variables that will point to the begin and end of the
3890   // host entries section. These will be defined by the linker.
3891   llvm::Type *OffloadEntryTy =
3892       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3893   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3894   auto *HostEntriesBegin = new llvm::GlobalVariable(
3895       M, OffloadEntryTy, /*isConstant=*/true,
3896       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3897       EntriesBeginName);
3898   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3899   auto *HostEntriesEnd =
3900       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3901                                llvm::GlobalValue::ExternalLinkage,
3902                                /*Initializer=*/nullptr, EntriesEndName);
3903 
3904   // Create all device images
3905   auto *DeviceImageTy = cast<llvm::StructType>(
3906       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3907   ConstantInitBuilder DeviceImagesBuilder(CGM);
3908   ConstantArrayBuilder DeviceImagesEntries =
3909       DeviceImagesBuilder.beginArray(DeviceImageTy);
3910 
3911   for (const llvm::Triple &Device : Devices) {
3912     StringRef T = Device.getTriple();
3913     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3914     auto *ImgBegin = new llvm::GlobalVariable(
3915         M, CGM.Int8Ty, /*isConstant=*/true,
3916         llvm::GlobalValue::ExternalWeakLinkage,
3917         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3918     std::string EndName = getName({"omp_offloading", "img_end", ""});
3919     auto *ImgEnd = new llvm::GlobalVariable(
3920         M, CGM.Int8Ty, /*isConstant=*/true,
3921         llvm::GlobalValue::ExternalWeakLinkage,
3922         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3923 
3924     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3925                               HostEntriesEnd};
3926     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3927                                              DeviceImagesEntries);
3928   }
3929 
3930   // Create device images global array.
3931   std::string ImagesName = getName({"omp_offloading", "device_images"});
3932   llvm::GlobalVariable *DeviceImages =
3933       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3934                                                 CGM.getPointerAlign(),
3935                                                 /*isConstant=*/true);
3936   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3937 
3938   // This is a Zero array to be used in the creation of the constant expressions
3939   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3940                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3941 
3942   // Create the target region descriptor.
3943   llvm::Constant *Data[] = {
3944       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3945       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3946                                            DeviceImages, Index),
3947       HostEntriesBegin, HostEntriesEnd};
3948   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3949   llvm::GlobalVariable *Desc = createGlobalStruct(
3950       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3951 
3952   // Emit code to register or unregister the descriptor at execution
3953   // startup or closing, respectively.
3954 
3955   llvm::Function *UnRegFn;
3956   {
3957     FunctionArgList Args;
3958     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3959     Args.push_back(&DummyPtr);
3960 
3961     CodeGenFunction CGF(CGM);
3962     // Disable debug info for global (de-)initializer because they are not part
3963     // of some particular construct.
3964     CGF.disableDebugInfo();
3965     const auto &FI =
3966         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3967     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3968     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3969     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3970     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3971     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3972                         Desc);
3973     CGF.FinishFunction();
3974   }
3975   llvm::Function *RegFn;
3976   {
3977     CodeGenFunction CGF(CGM);
3978     // Disable debug info for global (de-)initializer because they are not part
3979     // of some particular construct.
3980     CGF.disableDebugInfo();
3981     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3982     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3983 
3984     // Encode offload target triples into the registration function name. It
3985     // will serve as a comdat key for the registration/unregistration code for
3986     // this particular combination of offloading targets.
3987     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3988     RegFnNameParts[0] = "omp_offloading";
3989     RegFnNameParts[1] = "descriptor_reg";
3990     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3991                     [](const llvm::Triple &T) -> const std::string& {
3992                       return T.getTriple();
3993                     });
3994     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3995     std::string Descriptor = getName(RegFnNameParts);
3996     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3997     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3998     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
3999     // Create a variable to drive the registration and unregistration of the
4000     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4001     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4002                                   SourceLocation(), nullptr, C.CharTy,
4003                                   ImplicitParamDecl::Other);
4004     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4005     CGF.FinishFunction();
4006   }
4007   if (CGM.supportsCOMDAT()) {
4008     // It is sufficient to call registration function only once, so create a
4009     // COMDAT group for registration/unregistration functions and associated
4010     // data. That would reduce startup time and code size. Registration
4011     // function serves as a COMDAT group key.
4012     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4013     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4014     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4015     RegFn->setComdat(ComdatKey);
4016     UnRegFn->setComdat(ComdatKey);
4017     DeviceImages->setComdat(ComdatKey);
4018     Desc->setComdat(ComdatKey);
4019   }
4020   return RegFn;
4021 }
4022 
4023 void CGOpenMPRuntime::createOffloadEntry(
4024     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4025     llvm::GlobalValue::LinkageTypes Linkage) {
4026   StringRef Name = Addr->getName();
4027   llvm::Module &M = CGM.getModule();
4028   llvm::LLVMContext &C = M.getContext();
4029 
4030   // Create constant string with the name.
4031   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4032 
4033   std::string StringName = getName({"omp_offloading", "entry_name"});
4034   auto *Str = new llvm::GlobalVariable(
4035       M, StrPtrInit->getType(), /*isConstant=*/true,
4036       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4037   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4038 
4039   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4040                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4041                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4042                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4043                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4044   std::string EntryName = getName({"omp_offloading", "entry", ""});
4045   llvm::GlobalVariable *Entry = createGlobalStruct(
4046       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4047       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4048 
4049   // The entry has to be created in the section the linker expects it to be.
4050   std::string Section = getName({"omp_offloading", "entries"});
4051   Entry->setSection(Section);
4052 }
4053 
4054 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4055   // Emit the offloading entries and metadata so that the device codegen side
4056   // can easily figure out what to emit. The produced metadata looks like
4057   // this:
4058   //
4059   // !omp_offload.info = !{!1, ...}
4060   //
4061   // Right now we only generate metadata for function that contain target
4062   // regions.
4063 
4064   // If we do not have entries, we don't need to do anything.
4065   if (OffloadEntriesInfoManager.empty())
4066     return;
4067 
4068   llvm::Module &M = CGM.getModule();
4069   llvm::LLVMContext &C = M.getContext();
4070   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4071       OrderedEntries(OffloadEntriesInfoManager.size());
4072   llvm::SmallVector<StringRef, 16> ParentFunctions(
4073       OffloadEntriesInfoManager.size());
4074 
4075   // Auxiliary methods to create metadata values and strings.
4076   auto &&GetMDInt = [this](unsigned V) {
4077     return llvm::ConstantAsMetadata::get(
4078         llvm::ConstantInt::get(CGM.Int32Ty, V));
4079   };
4080 
4081   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4082 
4083   // Create the offloading info metadata node.
4084   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4085 
4086   // Create function that emits metadata for each target region entry;
4087   auto &&TargetRegionMetadataEmitter =
4088       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4089           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4090           unsigned Line,
4091           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4092         // Generate metadata for target regions. Each entry of this metadata
4093         // contains:
4094         // - Entry 0 -> Kind of this type of metadata (0).
4095         // - Entry 1 -> Device ID of the file where the entry was identified.
4096         // - Entry 2 -> File ID of the file where the entry was identified.
4097         // - Entry 3 -> Mangled name of the function where the entry was
4098         // identified.
4099         // - Entry 4 -> Line in the file where the entry was identified.
4100         // - Entry 5 -> Order the entry was created.
4101         // The first element of the metadata node is the kind.
4102         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4103                                  GetMDInt(FileID),      GetMDString(ParentName),
4104                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4105 
4106         // Save this entry in the right position of the ordered entries array.
4107         OrderedEntries[E.getOrder()] = &E;
4108         ParentFunctions[E.getOrder()] = ParentName;
4109 
4110         // Add metadata to the named metadata node.
4111         MD->addOperand(llvm::MDNode::get(C, Ops));
4112       };
4113 
4114   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4115       TargetRegionMetadataEmitter);
4116 
4117   // Create function that emits metadata for each device global variable entry;
4118   auto &&DeviceGlobalVarMetadataEmitter =
4119       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4120        MD](StringRef MangledName,
4121            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4122                &E) {
4123         // Generate metadata for global variables. Each entry of this metadata
4124         // contains:
4125         // - Entry 0 -> Kind of this type of metadata (1).
4126         // - Entry 1 -> Mangled name of the variable.
4127         // - Entry 2 -> Declare target kind.
4128         // - Entry 3 -> Order the entry was created.
4129         // The first element of the metadata node is the kind.
4130         llvm::Metadata *Ops[] = {
4131             GetMDInt(E.getKind()), GetMDString(MangledName),
4132             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4133 
4134         // Save this entry in the right position of the ordered entries array.
4135         OrderedEntries[E.getOrder()] = &E;
4136 
4137         // Add metadata to the named metadata node.
4138         MD->addOperand(llvm::MDNode::get(C, Ops));
4139       };
4140 
4141   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4142       DeviceGlobalVarMetadataEmitter);
4143 
4144   for (const auto *E : OrderedEntries) {
4145     assert(E && "All ordered entries must exist!");
4146     if (const auto *CE =
4147             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4148                 E)) {
4149       if (!CE->getID() || !CE->getAddress()) {
4150         // Do not blame the entry if the parent funtion is not emitted.
4151         StringRef FnName = ParentFunctions[CE->getOrder()];
4152         if (!CGM.GetGlobalValue(FnName))
4153           continue;
4154         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4155             DiagnosticsEngine::Error,
4156             "Offloading entry for target region is incorrect: either the "
4157             "address or the ID is invalid.");
4158         CGM.getDiags().Report(DiagID);
4159         continue;
4160       }
4161       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4162                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4163     } else if (const auto *CE =
4164                    dyn_cast<OffloadEntriesInfoManagerTy::
4165                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4166       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4167           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4168               CE->getFlags());
4169       switch (Flags) {
4170       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4171         if (!CE->getAddress()) {
4172           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4173               DiagnosticsEngine::Error,
4174               "Offloading entry for declare target variable is incorrect: the "
4175               "address is invalid.");
4176           CGM.getDiags().Report(DiagID);
4177           continue;
4178         }
4179         // The vaiable has no definition - no need to add the entry.
4180         if (CE->getVarSize().isZero())
4181           continue;
4182         break;
4183       }
4184       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4185         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4186                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4187                "Declaret target link address is set.");
4188         if (CGM.getLangOpts().OpenMPIsDevice)
4189           continue;
4190         if (!CE->getAddress()) {
4191           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4192               DiagnosticsEngine::Error,
4193               "Offloading entry for declare target variable is incorrect: the "
4194               "address is invalid.");
4195           CGM.getDiags().Report(DiagID);
4196           continue;
4197         }
4198         break;
4199       }
4200       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4201                          CE->getVarSize().getQuantity(), Flags,
4202                          CE->getLinkage());
4203     } else {
4204       llvm_unreachable("Unsupported entry kind.");
4205     }
4206   }
4207 }
4208 
4209 /// Loads all the offload entries information from the host IR
4210 /// metadata.
4211 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4212   // If we are in target mode, load the metadata from the host IR. This code has
4213   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4214 
4215   if (!CGM.getLangOpts().OpenMPIsDevice)
4216     return;
4217 
4218   if (CGM.getLangOpts().OMPHostIRFile.empty())
4219     return;
4220 
4221   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4222   if (auto EC = Buf.getError()) {
4223     CGM.getDiags().Report(diag::err_cannot_open_file)
4224         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4225     return;
4226   }
4227 
4228   llvm::LLVMContext C;
4229   auto ME = expectedToErrorOrAndEmitErrors(
4230       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4231 
4232   if (auto EC = ME.getError()) {
4233     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4234         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4235     CGM.getDiags().Report(DiagID)
4236         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4237     return;
4238   }
4239 
4240   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4241   if (!MD)
4242     return;
4243 
4244   for (llvm::MDNode *MN : MD->operands()) {
4245     auto &&GetMDInt = [MN](unsigned Idx) {
4246       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4247       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4248     };
4249 
4250     auto &&GetMDString = [MN](unsigned Idx) {
4251       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4252       return V->getString();
4253     };
4254 
4255     switch (GetMDInt(0)) {
4256     default:
4257       llvm_unreachable("Unexpected metadata!");
4258       break;
4259     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4260         OffloadingEntryInfoTargetRegion:
4261       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4262           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4263           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4264           /*Order=*/GetMDInt(5));
4265       break;
4266     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4267         OffloadingEntryInfoDeviceGlobalVar:
4268       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4269           /*MangledName=*/GetMDString(1),
4270           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4271               /*Flags=*/GetMDInt(2)),
4272           /*Order=*/GetMDInt(3));
4273       break;
4274     }
4275   }
4276 }
4277 
4278 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4279   if (!KmpRoutineEntryPtrTy) {
4280     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4281     ASTContext &C = CGM.getContext();
4282     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4283     FunctionProtoType::ExtProtoInfo EPI;
4284     KmpRoutineEntryPtrQTy = C.getPointerType(
4285         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4286     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4287   }
4288 }
4289 
4290 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4291   // Make sure the type of the entry is already created. This is the type we
4292   // have to create:
4293   // struct __tgt_offload_entry{
4294   //   void      *addr;       // Pointer to the offload entry info.
4295   //                          // (function or global)
4296   //   char      *name;       // Name of the function or global.
4297   //   size_t     size;       // Size of the entry info (0 if it a function).
4298   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4299   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4300   // };
4301   if (TgtOffloadEntryQTy.isNull()) {
4302     ASTContext &C = CGM.getContext();
4303     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4304     RD->startDefinition();
4305     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4306     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4307     addFieldToRecordDecl(C, RD, C.getSizeType());
4308     addFieldToRecordDecl(
4309         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4310     addFieldToRecordDecl(
4311         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4312     RD->completeDefinition();
4313     RD->addAttr(PackedAttr::CreateImplicit(C));
4314     TgtOffloadEntryQTy = C.getRecordType(RD);
4315   }
4316   return TgtOffloadEntryQTy;
4317 }
4318 
4319 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4320   // These are the types we need to build:
4321   // struct __tgt_device_image{
4322   // void   *ImageStart;       // Pointer to the target code start.
4323   // void   *ImageEnd;         // Pointer to the target code end.
4324   // // We also add the host entries to the device image, as it may be useful
4325   // // for the target runtime to have access to that information.
4326   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4327   //                                       // the entries.
4328   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4329   //                                       // entries (non inclusive).
4330   // };
4331   if (TgtDeviceImageQTy.isNull()) {
4332     ASTContext &C = CGM.getContext();
4333     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4334     RD->startDefinition();
4335     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4336     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4337     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4338     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4339     RD->completeDefinition();
4340     TgtDeviceImageQTy = C.getRecordType(RD);
4341   }
4342   return TgtDeviceImageQTy;
4343 }
4344 
4345 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4346   // struct __tgt_bin_desc{
4347   //   int32_t              NumDevices;      // Number of devices supported.
4348   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4349   //                                         // (one per device).
4350   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4351   //                                         // entries.
4352   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4353   //                                         // entries (non inclusive).
4354   // };
4355   if (TgtBinaryDescriptorQTy.isNull()) {
4356     ASTContext &C = CGM.getContext();
4357     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4358     RD->startDefinition();
4359     addFieldToRecordDecl(
4360         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4361     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4362     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4363     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4364     RD->completeDefinition();
4365     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4366   }
4367   return TgtBinaryDescriptorQTy;
4368 }
4369 
4370 namespace {
4371 struct PrivateHelpersTy {
4372   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4373                    const VarDecl *PrivateElemInit)
4374       : Original(Original), PrivateCopy(PrivateCopy),
4375         PrivateElemInit(PrivateElemInit) {}
4376   const VarDecl *Original;
4377   const VarDecl *PrivateCopy;
4378   const VarDecl *PrivateElemInit;
4379 };
4380 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4381 } // anonymous namespace
4382 
4383 static RecordDecl *
4384 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4385   if (!Privates.empty()) {
4386     ASTContext &C = CGM.getContext();
4387     // Build struct .kmp_privates_t. {
4388     //         /*  private vars  */
4389     //       };
4390     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4391     RD->startDefinition();
4392     for (const auto &Pair : Privates) {
4393       const VarDecl *VD = Pair.second.Original;
4394       QualType Type = VD->getType().getNonReferenceType();
4395       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4396       if (VD->hasAttrs()) {
4397         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4398              E(VD->getAttrs().end());
4399              I != E; ++I)
4400           FD->addAttr(*I);
4401       }
4402     }
4403     RD->completeDefinition();
4404     return RD;
4405   }
4406   return nullptr;
4407 }
4408 
4409 static RecordDecl *
4410 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4411                          QualType KmpInt32Ty,
4412                          QualType KmpRoutineEntryPointerQTy) {
4413   ASTContext &C = CGM.getContext();
4414   // Build struct kmp_task_t {
4415   //         void *              shareds;
4416   //         kmp_routine_entry_t routine;
4417   //         kmp_int32           part_id;
4418   //         kmp_cmplrdata_t data1;
4419   //         kmp_cmplrdata_t data2;
4420   // For taskloops additional fields:
4421   //         kmp_uint64          lb;
4422   //         kmp_uint64          ub;
4423   //         kmp_int64           st;
4424   //         kmp_int32           liter;
4425   //         void *              reductions;
4426   //       };
4427   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4428   UD->startDefinition();
4429   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4430   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4431   UD->completeDefinition();
4432   QualType KmpCmplrdataTy = C.getRecordType(UD);
4433   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4434   RD->startDefinition();
4435   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4436   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4437   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4439   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4440   if (isOpenMPTaskLoopDirective(Kind)) {
4441     QualType KmpUInt64Ty =
4442         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4443     QualType KmpInt64Ty =
4444         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4445     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4446     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4447     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4448     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4449     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4450   }
4451   RD->completeDefinition();
4452   return RD;
4453 }
4454 
4455 static RecordDecl *
4456 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4457                                      ArrayRef<PrivateDataTy> Privates) {
4458   ASTContext &C = CGM.getContext();
4459   // Build struct kmp_task_t_with_privates {
4460   //         kmp_task_t task_data;
4461   //         .kmp_privates_t. privates;
4462   //       };
4463   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4464   RD->startDefinition();
4465   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4466   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4467     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4468   RD->completeDefinition();
4469   return RD;
4470 }
4471 
4472 /// Emit a proxy function which accepts kmp_task_t as the second
4473 /// argument.
4474 /// \code
4475 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4476 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4477 ///   For taskloops:
4478 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4479 ///   tt->reductions, tt->shareds);
4480 ///   return 0;
4481 /// }
4482 /// \endcode
4483 static llvm::Function *
4484 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4485                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4486                       QualType KmpTaskTWithPrivatesPtrQTy,
4487                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4488                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4489                       llvm::Value *TaskPrivatesMap) {
4490   ASTContext &C = CGM.getContext();
4491   FunctionArgList Args;
4492   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4493                             ImplicitParamDecl::Other);
4494   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4495                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4496                                 ImplicitParamDecl::Other);
4497   Args.push_back(&GtidArg);
4498   Args.push_back(&TaskTypeArg);
4499   const auto &TaskEntryFnInfo =
4500       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4501   llvm::FunctionType *TaskEntryTy =
4502       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4503   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4504   auto *TaskEntry = llvm::Function::Create(
4505       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4506   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4507   TaskEntry->setDoesNotRecurse();
4508   CodeGenFunction CGF(CGM);
4509   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4510                     Loc, Loc);
4511 
4512   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4513   // tt,
4514   // For taskloops:
4515   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4516   // tt->task_data.shareds);
4517   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4518       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4519   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4520       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4521       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4522   const auto *KmpTaskTWithPrivatesQTyRD =
4523       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4524   LValue Base =
4525       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4526   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4527   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4528   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4529   llvm::Value *PartidParam = PartIdLVal.getPointer();
4530 
4531   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4532   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4533   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4534       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4535       CGF.ConvertTypeForMem(SharedsPtrTy));
4536 
4537   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4538   llvm::Value *PrivatesParam;
4539   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4540     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4541     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4542         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4543   } else {
4544     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4545   }
4546 
4547   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4548                                TaskPrivatesMap,
4549                                CGF.Builder
4550                                    .CreatePointerBitCastOrAddrSpaceCast(
4551                                        TDBase.getAddress(), CGF.VoidPtrTy)
4552                                    .getPointer()};
4553   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4554                                           std::end(CommonArgs));
4555   if (isOpenMPTaskLoopDirective(Kind)) {
4556     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4557     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4558     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4559     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4560     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4561     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4562     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4563     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4564     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4565     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4566     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4567     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4568     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4569     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4570     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4571     CallArgs.push_back(LBParam);
4572     CallArgs.push_back(UBParam);
4573     CallArgs.push_back(StParam);
4574     CallArgs.push_back(LIParam);
4575     CallArgs.push_back(RParam);
4576   }
4577   CallArgs.push_back(SharedsParam);
4578 
4579   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4580                                                   CallArgs);
4581   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4582                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4583   CGF.FinishFunction();
4584   return TaskEntry;
4585 }
4586 
4587 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4588                                             SourceLocation Loc,
4589                                             QualType KmpInt32Ty,
4590                                             QualType KmpTaskTWithPrivatesPtrQTy,
4591                                             QualType KmpTaskTWithPrivatesQTy) {
4592   ASTContext &C = CGM.getContext();
4593   FunctionArgList Args;
4594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4595                             ImplicitParamDecl::Other);
4596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4598                                 ImplicitParamDecl::Other);
4599   Args.push_back(&GtidArg);
4600   Args.push_back(&TaskTypeArg);
4601   const auto &DestructorFnInfo =
4602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4603   llvm::FunctionType *DestructorFnTy =
4604       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4605   std::string Name =
4606       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4607   auto *DestructorFn =
4608       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4609                              Name, &CGM.getModule());
4610   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4611                                     DestructorFnInfo);
4612   DestructorFn->setDoesNotRecurse();
4613   CodeGenFunction CGF(CGM);
4614   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4615                     Args, Loc, Loc);
4616 
4617   LValue Base = CGF.EmitLoadOfPointerLValue(
4618       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4619       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4620   const auto *KmpTaskTWithPrivatesQTyRD =
4621       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4622   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4623   Base = CGF.EmitLValueForField(Base, *FI);
4624   for (const auto *Field :
4625        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4626     if (QualType::DestructionKind DtorKind =
4627             Field->getType().isDestructedType()) {
4628       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4629       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4630     }
4631   }
4632   CGF.FinishFunction();
4633   return DestructorFn;
4634 }
4635 
4636 /// Emit a privates mapping function for correct handling of private and
4637 /// firstprivate variables.
4638 /// \code
4639 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4640 /// **noalias priv1,...,  <tyn> **noalias privn) {
4641 ///   *priv1 = &.privates.priv1;
4642 ///   ...;
4643 ///   *privn = &.privates.privn;
4644 /// }
4645 /// \endcode
4646 static llvm::Value *
4647 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4648                                ArrayRef<const Expr *> PrivateVars,
4649                                ArrayRef<const Expr *> FirstprivateVars,
4650                                ArrayRef<const Expr *> LastprivateVars,
4651                                QualType PrivatesQTy,
4652                                ArrayRef<PrivateDataTy> Privates) {
4653   ASTContext &C = CGM.getContext();
4654   FunctionArgList Args;
4655   ImplicitParamDecl TaskPrivatesArg(
4656       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4657       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4658       ImplicitParamDecl::Other);
4659   Args.push_back(&TaskPrivatesArg);
4660   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4661   unsigned Counter = 1;
4662   for (const Expr *E : PrivateVars) {
4663     Args.push_back(ImplicitParamDecl::Create(
4664         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4665         C.getPointerType(C.getPointerType(E->getType()))
4666             .withConst()
4667             .withRestrict(),
4668         ImplicitParamDecl::Other));
4669     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670     PrivateVarsPos[VD] = Counter;
4671     ++Counter;
4672   }
4673   for (const Expr *E : FirstprivateVars) {
4674     Args.push_back(ImplicitParamDecl::Create(
4675         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676         C.getPointerType(C.getPointerType(E->getType()))
4677             .withConst()
4678             .withRestrict(),
4679         ImplicitParamDecl::Other));
4680     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681     PrivateVarsPos[VD] = Counter;
4682     ++Counter;
4683   }
4684   for (const Expr *E : LastprivateVars) {
4685     Args.push_back(ImplicitParamDecl::Create(
4686         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4687         C.getPointerType(C.getPointerType(E->getType()))
4688             .withConst()
4689             .withRestrict(),
4690         ImplicitParamDecl::Other));
4691     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4692     PrivateVarsPos[VD] = Counter;
4693     ++Counter;
4694   }
4695   const auto &TaskPrivatesMapFnInfo =
4696       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4697   llvm::FunctionType *TaskPrivatesMapTy =
4698       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4699   std::string Name =
4700       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4701   auto *TaskPrivatesMap = llvm::Function::Create(
4702       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4703       &CGM.getModule());
4704   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4705                                     TaskPrivatesMapFnInfo);
4706   if (CGM.getLangOpts().Optimize) {
4707     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4708     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4709     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4710   }
4711   CodeGenFunction CGF(CGM);
4712   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4713                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4714 
4715   // *privi = &.privates.privi;
4716   LValue Base = CGF.EmitLoadOfPointerLValue(
4717       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4718       TaskPrivatesArg.getType()->castAs<PointerType>());
4719   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4720   Counter = 0;
4721   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4722     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4723     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4724     LValue RefLVal =
4725         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4726     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4727         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4728     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4729     ++Counter;
4730   }
4731   CGF.FinishFunction();
4732   return TaskPrivatesMap;
4733 }
4734 
4735 /// Emit initialization for private variables in task-based directives.
4736 static void emitPrivatesInit(CodeGenFunction &CGF,
4737                              const OMPExecutableDirective &D,
4738                              Address KmpTaskSharedsPtr, LValue TDBase,
4739                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4740                              QualType SharedsTy, QualType SharedsPtrTy,
4741                              const OMPTaskDataTy &Data,
4742                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4743   ASTContext &C = CGF.getContext();
4744   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4745   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4746   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4747                                  ? OMPD_taskloop
4748                                  : OMPD_task;
4749   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4750   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4751   LValue SrcBase;
4752   bool IsTargetTask =
4753       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4754       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4755   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4756   // PointersArray and SizesArray. The original variables for these arrays are
4757   // not captured and we get their addresses explicitly.
4758   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4759       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4760     SrcBase = CGF.MakeAddrLValue(
4761         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4762             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4763         SharedsTy);
4764   }
4765   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4766   for (const PrivateDataTy &Pair : Privates) {
4767     const VarDecl *VD = Pair.second.PrivateCopy;
4768     const Expr *Init = VD->getAnyInitializer();
4769     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4770                              !CGF.isTrivialInitializer(Init)))) {
4771       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4772       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4773         const VarDecl *OriginalVD = Pair.second.Original;
4774         // Check if the variable is the target-based BasePointersArray,
4775         // PointersArray or SizesArray.
4776         LValue SharedRefLValue;
4777         QualType Type = PrivateLValue.getType();
4778         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4779         if (IsTargetTask && !SharedField) {
4780           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4781                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4782                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4783                          ->getNumParams() == 0 &&
4784                  isa<TranslationUnitDecl>(
4785                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4786                          ->getDeclContext()) &&
4787                  "Expected artificial target data variable.");
4788           SharedRefLValue =
4789               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4790         } else {
4791           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4792           SharedRefLValue = CGF.MakeAddrLValue(
4793               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4794               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4795               SharedRefLValue.getTBAAInfo());
4796         }
4797         if (Type->isArrayType()) {
4798           // Initialize firstprivate array.
4799           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4800             // Perform simple memcpy.
4801             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4802           } else {
4803             // Initialize firstprivate array using element-by-element
4804             // initialization.
4805             CGF.EmitOMPAggregateAssign(
4806                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4807                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4808                                                   Address SrcElement) {
4809                   // Clean up any temporaries needed by the initialization.
4810                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4811                   InitScope.addPrivate(
4812                       Elem, [SrcElement]() -> Address { return SrcElement; });
4813                   (void)InitScope.Privatize();
4814                   // Emit initialization for single element.
4815                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4816                       CGF, &CapturesInfo);
4817                   CGF.EmitAnyExprToMem(Init, DestElement,
4818                                        Init->getType().getQualifiers(),
4819                                        /*IsInitializer=*/false);
4820                 });
4821           }
4822         } else {
4823           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4824           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4825             return SharedRefLValue.getAddress();
4826           });
4827           (void)InitScope.Privatize();
4828           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4829           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4830                              /*capturedByInit=*/false);
4831         }
4832       } else {
4833         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4834       }
4835     }
4836     ++FI;
4837   }
4838 }
4839 
4840 /// Check if duplication function is required for taskloops.
4841 static bool checkInitIsRequired(CodeGenFunction &CGF,
4842                                 ArrayRef<PrivateDataTy> Privates) {
4843   bool InitRequired = false;
4844   for (const PrivateDataTy &Pair : Privates) {
4845     const VarDecl *VD = Pair.second.PrivateCopy;
4846     const Expr *Init = VD->getAnyInitializer();
4847     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4848                                     !CGF.isTrivialInitializer(Init));
4849     if (InitRequired)
4850       break;
4851   }
4852   return InitRequired;
4853 }
4854 
4855 
4856 /// Emit task_dup function (for initialization of
4857 /// private/firstprivate/lastprivate vars and last_iter flag)
4858 /// \code
4859 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4860 /// lastpriv) {
4861 /// // setup lastprivate flag
4862 ///    task_dst->last = lastpriv;
4863 /// // could be constructor calls here...
4864 /// }
4865 /// \endcode
4866 static llvm::Value *
4867 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4868                     const OMPExecutableDirective &D,
4869                     QualType KmpTaskTWithPrivatesPtrQTy,
4870                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4871                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4872                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4873                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4874   ASTContext &C = CGM.getContext();
4875   FunctionArgList Args;
4876   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4877                            KmpTaskTWithPrivatesPtrQTy,
4878                            ImplicitParamDecl::Other);
4879   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4880                            KmpTaskTWithPrivatesPtrQTy,
4881                            ImplicitParamDecl::Other);
4882   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4883                                 ImplicitParamDecl::Other);
4884   Args.push_back(&DstArg);
4885   Args.push_back(&SrcArg);
4886   Args.push_back(&LastprivArg);
4887   const auto &TaskDupFnInfo =
4888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4889   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4890   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4891   auto *TaskDup = llvm::Function::Create(
4892       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4893   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4894   TaskDup->setDoesNotRecurse();
4895   CodeGenFunction CGF(CGM);
4896   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4897                     Loc);
4898 
4899   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4900       CGF.GetAddrOfLocalVar(&DstArg),
4901       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4902   // task_dst->liter = lastpriv;
4903   if (WithLastIter) {
4904     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4905     LValue Base = CGF.EmitLValueForField(
4906         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4907     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4908     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4909         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4910     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4911   }
4912 
4913   // Emit initial values for private copies (if any).
4914   assert(!Privates.empty());
4915   Address KmpTaskSharedsPtr = Address::invalid();
4916   if (!Data.FirstprivateVars.empty()) {
4917     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4918         CGF.GetAddrOfLocalVar(&SrcArg),
4919         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4920     LValue Base = CGF.EmitLValueForField(
4921         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4922     KmpTaskSharedsPtr = Address(
4923         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4924                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4925                                                   KmpTaskTShareds)),
4926                              Loc),
4927         CGF.getNaturalTypeAlignment(SharedsTy));
4928   }
4929   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4930                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4931   CGF.FinishFunction();
4932   return TaskDup;
4933 }
4934 
4935 /// Checks if destructor function is required to be generated.
4936 /// \return true if cleanups are required, false otherwise.
4937 static bool
4938 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4939   bool NeedsCleanup = false;
4940   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4941   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4942   for (const FieldDecl *FD : PrivateRD->fields()) {
4943     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4944     if (NeedsCleanup)
4945       break;
4946   }
4947   return NeedsCleanup;
4948 }
4949 
4950 CGOpenMPRuntime::TaskResultTy
4951 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4952                               const OMPExecutableDirective &D,
4953                               llvm::Function *TaskFunction, QualType SharedsTy,
4954                               Address Shareds, const OMPTaskDataTy &Data) {
4955   ASTContext &C = CGM.getContext();
4956   llvm::SmallVector<PrivateDataTy, 4> Privates;
4957   // Aggregate privates and sort them by the alignment.
4958   auto I = Data.PrivateCopies.begin();
4959   for (const Expr *E : Data.PrivateVars) {
4960     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4961     Privates.emplace_back(
4962         C.getDeclAlign(VD),
4963         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4964                          /*PrivateElemInit=*/nullptr));
4965     ++I;
4966   }
4967   I = Data.FirstprivateCopies.begin();
4968   auto IElemInitRef = Data.FirstprivateInits.begin();
4969   for (const Expr *E : Data.FirstprivateVars) {
4970     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4971     Privates.emplace_back(
4972         C.getDeclAlign(VD),
4973         PrivateHelpersTy(
4974             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4975             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4976     ++I;
4977     ++IElemInitRef;
4978   }
4979   I = Data.LastprivateCopies.begin();
4980   for (const Expr *E : Data.LastprivateVars) {
4981     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4982     Privates.emplace_back(
4983         C.getDeclAlign(VD),
4984         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4985                          /*PrivateElemInit=*/nullptr));
4986     ++I;
4987   }
4988   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4989     return L.first > R.first;
4990   });
4991   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4992   // Build type kmp_routine_entry_t (if not built yet).
4993   emitKmpRoutineEntryT(KmpInt32Ty);
4994   // Build type kmp_task_t (if not built yet).
4995   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4996     if (SavedKmpTaskloopTQTy.isNull()) {
4997       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4998           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4999     }
5000     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5001   } else {
5002     assert((D.getDirectiveKind() == OMPD_task ||
5003             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5004             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5005            "Expected taskloop, task or target directive");
5006     if (SavedKmpTaskTQTy.isNull()) {
5007       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5008           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5009     }
5010     KmpTaskTQTy = SavedKmpTaskTQTy;
5011   }
5012   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5013   // Build particular struct kmp_task_t for the given task.
5014   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5015       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5016   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5017   QualType KmpTaskTWithPrivatesPtrQTy =
5018       C.getPointerType(KmpTaskTWithPrivatesQTy);
5019   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5020   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5021       KmpTaskTWithPrivatesTy->getPointerTo();
5022   llvm::Value *KmpTaskTWithPrivatesTySize =
5023       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5024   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5025 
5026   // Emit initial values for private copies (if any).
5027   llvm::Value *TaskPrivatesMap = nullptr;
5028   llvm::Type *TaskPrivatesMapTy =
5029       std::next(TaskFunction->arg_begin(), 3)->getType();
5030   if (!Privates.empty()) {
5031     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5032     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5033         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5034         FI->getType(), Privates);
5035     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5036         TaskPrivatesMap, TaskPrivatesMapTy);
5037   } else {
5038     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5039         cast<llvm::PointerType>(TaskPrivatesMapTy));
5040   }
5041   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5042   // kmp_task_t *tt);
5043   llvm::Function *TaskEntry = emitProxyTaskFunction(
5044       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5045       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5046       TaskPrivatesMap);
5047 
5048   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5049   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5050   // kmp_routine_entry_t *task_entry);
5051   // Task flags. Format is taken from
5052   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5053   // description of kmp_tasking_flags struct.
5054   enum {
5055     TiedFlag = 0x1,
5056     FinalFlag = 0x2,
5057     DestructorsFlag = 0x8,
5058     PriorityFlag = 0x20
5059   };
5060   unsigned Flags = Data.Tied ? TiedFlag : 0;
5061   bool NeedsCleanup = false;
5062   if (!Privates.empty()) {
5063     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5064     if (NeedsCleanup)
5065       Flags = Flags | DestructorsFlag;
5066   }
5067   if (Data.Priority.getInt())
5068     Flags = Flags | PriorityFlag;
5069   llvm::Value *TaskFlags =
5070       Data.Final.getPointer()
5071           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5072                                      CGF.Builder.getInt32(FinalFlag),
5073                                      CGF.Builder.getInt32(/*C=*/0))
5074           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5075   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5076   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5077   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
5078                               getThreadID(CGF, Loc), TaskFlags,
5079                               KmpTaskTWithPrivatesTySize, SharedsSize,
5080                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5081                                   TaskEntry, KmpRoutineEntryPtrTy)};
5082   llvm::Value *NewTask = CGF.EmitRuntimeCall(
5083       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5084   llvm::Value *NewTaskNewTaskTTy =
5085       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5086           NewTask, KmpTaskTWithPrivatesPtrTy);
5087   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5088                                                KmpTaskTWithPrivatesQTy);
5089   LValue TDBase =
5090       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5091   // Fill the data in the resulting kmp_task_t record.
5092   // Copy shareds if there are any.
5093   Address KmpTaskSharedsPtr = Address::invalid();
5094   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5095     KmpTaskSharedsPtr =
5096         Address(CGF.EmitLoadOfScalar(
5097                     CGF.EmitLValueForField(
5098                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5099                                            KmpTaskTShareds)),
5100                     Loc),
5101                 CGF.getNaturalTypeAlignment(SharedsTy));
5102     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5103     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5104     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5105   }
5106   // Emit initial values for private copies (if any).
5107   TaskResultTy Result;
5108   if (!Privates.empty()) {
5109     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5110                      SharedsTy, SharedsPtrTy, Data, Privates,
5111                      /*ForDup=*/false);
5112     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5113         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5114       Result.TaskDupFn = emitTaskDupFunction(
5115           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5116           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5117           /*WithLastIter=*/!Data.LastprivateVars.empty());
5118     }
5119   }
5120   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5121   enum { Priority = 0, Destructors = 1 };
5122   // Provide pointer to function with destructors for privates.
5123   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5124   const RecordDecl *KmpCmplrdataUD =
5125       (*FI)->getType()->getAsUnionType()->getDecl();
5126   if (NeedsCleanup) {
5127     llvm::Value *DestructorFn = emitDestructorsFunction(
5128         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5129         KmpTaskTWithPrivatesQTy);
5130     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5131     LValue DestructorsLV = CGF.EmitLValueForField(
5132         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5133     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5134                               DestructorFn, KmpRoutineEntryPtrTy),
5135                           DestructorsLV);
5136   }
5137   // Set priority.
5138   if (Data.Priority.getInt()) {
5139     LValue Data2LV = CGF.EmitLValueForField(
5140         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5141     LValue PriorityLV = CGF.EmitLValueForField(
5142         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5143     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5144   }
5145   Result.NewTask = NewTask;
5146   Result.TaskEntry = TaskEntry;
5147   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5148   Result.TDBase = TDBase;
5149   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5150   return Result;
5151 }
5152 
5153 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5154                                    const OMPExecutableDirective &D,
5155                                    llvm::Function *TaskFunction,
5156                                    QualType SharedsTy, Address Shareds,
5157                                    const Expr *IfCond,
5158                                    const OMPTaskDataTy &Data) {
5159   if (!CGF.HaveInsertPoint())
5160     return;
5161 
5162   TaskResultTy Result =
5163       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5164   llvm::Value *NewTask = Result.NewTask;
5165   llvm::Function *TaskEntry = Result.TaskEntry;
5166   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5167   LValue TDBase = Result.TDBase;
5168   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5169   ASTContext &C = CGM.getContext();
5170   // Process list of dependences.
5171   Address DependenciesArray = Address::invalid();
5172   unsigned NumDependencies = Data.Dependences.size();
5173   if (NumDependencies) {
5174     // Dependence kind for RTL.
5175     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5176     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5177     RecordDecl *KmpDependInfoRD;
5178     QualType FlagsTy =
5179         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5180     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5181     if (KmpDependInfoTy.isNull()) {
5182       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5183       KmpDependInfoRD->startDefinition();
5184       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5185       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5186       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5187       KmpDependInfoRD->completeDefinition();
5188       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5189     } else {
5190       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5191     }
5192     // Define type kmp_depend_info[<Dependences.size()>];
5193     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5194         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5195         ArrayType::Normal, /*IndexTypeQuals=*/0);
5196     // kmp_depend_info[<Dependences.size()>] deps;
5197     DependenciesArray =
5198         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5199     for (unsigned I = 0; I < NumDependencies; ++I) {
5200       const Expr *E = Data.Dependences[I].second;
5201       LValue Addr = CGF.EmitLValue(E);
5202       llvm::Value *Size;
5203       QualType Ty = E->getType();
5204       if (const auto *ASE =
5205               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5206         LValue UpAddrLVal =
5207             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5208         llvm::Value *UpAddr =
5209             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5210         llvm::Value *LowIntPtr =
5211             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5212         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5213         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5214       } else {
5215         Size = CGF.getTypeSize(Ty);
5216       }
5217       LValue Base = CGF.MakeAddrLValue(
5218           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5219           KmpDependInfoTy);
5220       // deps[i].base_addr = &<Dependences[i].second>;
5221       LValue BaseAddrLVal = CGF.EmitLValueForField(
5222           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5223       CGF.EmitStoreOfScalar(
5224           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5225           BaseAddrLVal);
5226       // deps[i].len = sizeof(<Dependences[i].second>);
5227       LValue LenLVal = CGF.EmitLValueForField(
5228           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5229       CGF.EmitStoreOfScalar(Size, LenLVal);
5230       // deps[i].flags = <Dependences[i].first>;
5231       RTLDependenceKindTy DepKind;
5232       switch (Data.Dependences[I].first) {
5233       case OMPC_DEPEND_in:
5234         DepKind = DepIn;
5235         break;
5236       // Out and InOut dependencies must use the same code.
5237       case OMPC_DEPEND_out:
5238       case OMPC_DEPEND_inout:
5239         DepKind = DepInOut;
5240         break;
5241       case OMPC_DEPEND_mutexinoutset:
5242         DepKind = DepMutexInOutSet;
5243         break;
5244       case OMPC_DEPEND_source:
5245       case OMPC_DEPEND_sink:
5246       case OMPC_DEPEND_unknown:
5247         llvm_unreachable("Unknown task dependence type");
5248       }
5249       LValue FlagsLVal = CGF.EmitLValueForField(
5250           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5251       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5252                             FlagsLVal);
5253     }
5254     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5255         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5256   }
5257 
5258   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5259   // libcall.
5260   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5261   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5262   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5263   // list is not empty
5264   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5265   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5266   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5267   llvm::Value *DepTaskArgs[7];
5268   if (NumDependencies) {
5269     DepTaskArgs[0] = UpLoc;
5270     DepTaskArgs[1] = ThreadID;
5271     DepTaskArgs[2] = NewTask;
5272     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5273     DepTaskArgs[4] = DependenciesArray.getPointer();
5274     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5275     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5276   }
5277   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5278                         &TaskArgs,
5279                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5280     if (!Data.Tied) {
5281       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5282       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5283       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5284     }
5285     if (NumDependencies) {
5286       CGF.EmitRuntimeCall(
5287           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5288     } else {
5289       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5290                           TaskArgs);
5291     }
5292     // Check if parent region is untied and build return for untied task;
5293     if (auto *Region =
5294             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5295       Region->emitUntiedSwitch(CGF);
5296   };
5297 
5298   llvm::Value *DepWaitTaskArgs[6];
5299   if (NumDependencies) {
5300     DepWaitTaskArgs[0] = UpLoc;
5301     DepWaitTaskArgs[1] = ThreadID;
5302     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5303     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5304     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5305     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5306   }
5307   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5308                         NumDependencies, &DepWaitTaskArgs,
5309                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5310     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5311     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5312     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5313     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5314     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5315     // is specified.
5316     if (NumDependencies)
5317       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5318                           DepWaitTaskArgs);
5319     // Call proxy_task_entry(gtid, new_task);
5320     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5321                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5322       Action.Enter(CGF);
5323       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5324       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5325                                                           OutlinedFnArgs);
5326     };
5327 
5328     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5329     // kmp_task_t *new_task);
5330     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5331     // kmp_task_t *new_task);
5332     RegionCodeGenTy RCG(CodeGen);
5333     CommonActionTy Action(
5334         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5335         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5336     RCG.setAction(Action);
5337     RCG(CGF);
5338   };
5339 
5340   if (IfCond) {
5341     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5342   } else {
5343     RegionCodeGenTy ThenRCG(ThenCodeGen);
5344     ThenRCG(CGF);
5345   }
5346 }
5347 
5348 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5349                                        const OMPLoopDirective &D,
5350                                        llvm::Function *TaskFunction,
5351                                        QualType SharedsTy, Address Shareds,
5352                                        const Expr *IfCond,
5353                                        const OMPTaskDataTy &Data) {
5354   if (!CGF.HaveInsertPoint())
5355     return;
5356   TaskResultTy Result =
5357       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5358   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5359   // libcall.
5360   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5361   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5362   // sched, kmp_uint64 grainsize, void *task_dup);
5363   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5364   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5365   llvm::Value *IfVal;
5366   if (IfCond) {
5367     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5368                                       /*isSigned=*/true);
5369   } else {
5370     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5371   }
5372 
5373   LValue LBLVal = CGF.EmitLValueForField(
5374       Result.TDBase,
5375       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5376   const auto *LBVar =
5377       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5378   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5379                        /*IsInitializer=*/true);
5380   LValue UBLVal = CGF.EmitLValueForField(
5381       Result.TDBase,
5382       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5383   const auto *UBVar =
5384       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5385   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5386                        /*IsInitializer=*/true);
5387   LValue StLVal = CGF.EmitLValueForField(
5388       Result.TDBase,
5389       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5390   const auto *StVar =
5391       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5392   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5393                        /*IsInitializer=*/true);
5394   // Store reductions address.
5395   LValue RedLVal = CGF.EmitLValueForField(
5396       Result.TDBase,
5397       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5398   if (Data.Reductions) {
5399     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5400   } else {
5401     CGF.EmitNullInitialization(RedLVal.getAddress(),
5402                                CGF.getContext().VoidPtrTy);
5403   }
5404   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5405   llvm::Value *TaskArgs[] = {
5406       UpLoc,
5407       ThreadID,
5408       Result.NewTask,
5409       IfVal,
5410       LBLVal.getPointer(),
5411       UBLVal.getPointer(),
5412       CGF.EmitLoadOfScalar(StLVal, Loc),
5413       llvm::ConstantInt::getSigned(
5414               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5415       llvm::ConstantInt::getSigned(
5416           CGF.IntTy, Data.Schedule.getPointer()
5417                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5418                          : NoSchedule),
5419       Data.Schedule.getPointer()
5420           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5421                                       /*isSigned=*/false)
5422           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5423       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5424                              Result.TaskDupFn, CGF.VoidPtrTy)
5425                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5426   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5427 }
5428 
5429 /// Emit reduction operation for each element of array (required for
5430 /// array sections) LHS op = RHS.
5431 /// \param Type Type of array.
5432 /// \param LHSVar Variable on the left side of the reduction operation
5433 /// (references element of array in original variable).
5434 /// \param RHSVar Variable on the right side of the reduction operation
5435 /// (references element of array in original variable).
5436 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5437 /// RHSVar.
5438 static void EmitOMPAggregateReduction(
5439     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5440     const VarDecl *RHSVar,
5441     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5442                                   const Expr *, const Expr *)> &RedOpGen,
5443     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5444     const Expr *UpExpr = nullptr) {
5445   // Perform element-by-element initialization.
5446   QualType ElementTy;
5447   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5448   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5449 
5450   // Drill down to the base element type on both arrays.
5451   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5452   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5453 
5454   llvm::Value *RHSBegin = RHSAddr.getPointer();
5455   llvm::Value *LHSBegin = LHSAddr.getPointer();
5456   // Cast from pointer to array type to pointer to single element.
5457   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5458   // The basic structure here is a while-do loop.
5459   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5460   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5461   llvm::Value *IsEmpty =
5462       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5463   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5464 
5465   // Enter the loop body, making that address the current address.
5466   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5467   CGF.EmitBlock(BodyBB);
5468 
5469   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5470 
5471   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5472       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5473   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5474   Address RHSElementCurrent =
5475       Address(RHSElementPHI,
5476               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5477 
5478   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5479       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5480   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5481   Address LHSElementCurrent =
5482       Address(LHSElementPHI,
5483               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5484 
5485   // Emit copy.
5486   CodeGenFunction::OMPPrivateScope Scope(CGF);
5487   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5488   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5489   Scope.Privatize();
5490   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5491   Scope.ForceCleanup();
5492 
5493   // Shift the address forward by one element.
5494   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5495       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5496   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5497       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5498   // Check whether we've reached the end.
5499   llvm::Value *Done =
5500       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5501   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5502   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5503   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5504 
5505   // Done.
5506   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5507 }
5508 
5509 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5510 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5511 /// UDR combiner function.
5512 static void emitReductionCombiner(CodeGenFunction &CGF,
5513                                   const Expr *ReductionOp) {
5514   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5515     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5516       if (const auto *DRE =
5517               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5518         if (const auto *DRD =
5519                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5520           std::pair<llvm::Function *, llvm::Function *> Reduction =
5521               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5522           RValue Func = RValue::get(Reduction.first);
5523           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5524           CGF.EmitIgnoredExpr(ReductionOp);
5525           return;
5526         }
5527   CGF.EmitIgnoredExpr(ReductionOp);
5528 }
5529 
5530 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5531     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5532     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5533     ArrayRef<const Expr *> ReductionOps) {
5534   ASTContext &C = CGM.getContext();
5535 
5536   // void reduction_func(void *LHSArg, void *RHSArg);
5537   FunctionArgList Args;
5538   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5539                            ImplicitParamDecl::Other);
5540   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5541                            ImplicitParamDecl::Other);
5542   Args.push_back(&LHSArg);
5543   Args.push_back(&RHSArg);
5544   const auto &CGFI =
5545       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5546   std::string Name = getName({"omp", "reduction", "reduction_func"});
5547   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5548                                     llvm::GlobalValue::InternalLinkage, Name,
5549                                     &CGM.getModule());
5550   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5551   Fn->setDoesNotRecurse();
5552   CodeGenFunction CGF(CGM);
5553   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5554 
5555   // Dst = (void*[n])(LHSArg);
5556   // Src = (void*[n])(RHSArg);
5557   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5558       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5559       ArgsType), CGF.getPointerAlign());
5560   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5561       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5562       ArgsType), CGF.getPointerAlign());
5563 
5564   //  ...
5565   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5566   //  ...
5567   CodeGenFunction::OMPPrivateScope Scope(CGF);
5568   auto IPriv = Privates.begin();
5569   unsigned Idx = 0;
5570   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5571     const auto *RHSVar =
5572         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5573     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5574       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5575     });
5576     const auto *LHSVar =
5577         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5578     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5579       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5580     });
5581     QualType PrivTy = (*IPriv)->getType();
5582     if (PrivTy->isVariablyModifiedType()) {
5583       // Get array size and emit VLA type.
5584       ++Idx;
5585       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5586       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5587       const VariableArrayType *VLA =
5588           CGF.getContext().getAsVariableArrayType(PrivTy);
5589       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5590       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5591           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5592       CGF.EmitVariablyModifiedType(PrivTy);
5593     }
5594   }
5595   Scope.Privatize();
5596   IPriv = Privates.begin();
5597   auto ILHS = LHSExprs.begin();
5598   auto IRHS = RHSExprs.begin();
5599   for (const Expr *E : ReductionOps) {
5600     if ((*IPriv)->getType()->isArrayType()) {
5601       // Emit reduction for array section.
5602       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5603       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5604       EmitOMPAggregateReduction(
5605           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5606           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5607             emitReductionCombiner(CGF, E);
5608           });
5609     } else {
5610       // Emit reduction for array subscript or single variable.
5611       emitReductionCombiner(CGF, E);
5612     }
5613     ++IPriv;
5614     ++ILHS;
5615     ++IRHS;
5616   }
5617   Scope.ForceCleanup();
5618   CGF.FinishFunction();
5619   return Fn;
5620 }
5621 
5622 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5623                                                   const Expr *ReductionOp,
5624                                                   const Expr *PrivateRef,
5625                                                   const DeclRefExpr *LHS,
5626                                                   const DeclRefExpr *RHS) {
5627   if (PrivateRef->getType()->isArrayType()) {
5628     // Emit reduction for array section.
5629     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5630     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5631     EmitOMPAggregateReduction(
5632         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5633         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5634           emitReductionCombiner(CGF, ReductionOp);
5635         });
5636   } else {
5637     // Emit reduction for array subscript or single variable.
5638     emitReductionCombiner(CGF, ReductionOp);
5639   }
5640 }
5641 
5642 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5643                                     ArrayRef<const Expr *> Privates,
5644                                     ArrayRef<const Expr *> LHSExprs,
5645                                     ArrayRef<const Expr *> RHSExprs,
5646                                     ArrayRef<const Expr *> ReductionOps,
5647                                     ReductionOptionsTy Options) {
5648   if (!CGF.HaveInsertPoint())
5649     return;
5650 
5651   bool WithNowait = Options.WithNowait;
5652   bool SimpleReduction = Options.SimpleReduction;
5653 
5654   // Next code should be emitted for reduction:
5655   //
5656   // static kmp_critical_name lock = { 0 };
5657   //
5658   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5659   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5660   //  ...
5661   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5662   //  *(Type<n>-1*)rhs[<n>-1]);
5663   // }
5664   //
5665   // ...
5666   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5667   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5668   // RedList, reduce_func, &<lock>)) {
5669   // case 1:
5670   //  ...
5671   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5672   //  ...
5673   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5674   // break;
5675   // case 2:
5676   //  ...
5677   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5678   //  ...
5679   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5680   // break;
5681   // default:;
5682   // }
5683   //
5684   // if SimpleReduction is true, only the next code is generated:
5685   //  ...
5686   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5687   //  ...
5688 
5689   ASTContext &C = CGM.getContext();
5690 
5691   if (SimpleReduction) {
5692     CodeGenFunction::RunCleanupsScope Scope(CGF);
5693     auto IPriv = Privates.begin();
5694     auto ILHS = LHSExprs.begin();
5695     auto IRHS = RHSExprs.begin();
5696     for (const Expr *E : ReductionOps) {
5697       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698                                   cast<DeclRefExpr>(*IRHS));
5699       ++IPriv;
5700       ++ILHS;
5701       ++IRHS;
5702     }
5703     return;
5704   }
5705 
5706   // 1. Build a list of reduction variables.
5707   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5708   auto Size = RHSExprs.size();
5709   for (const Expr *E : Privates) {
5710     if (E->getType()->isVariablyModifiedType())
5711       // Reserve place for array size.
5712       ++Size;
5713   }
5714   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5715   QualType ReductionArrayTy =
5716       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5717                              /*IndexTypeQuals=*/0);
5718   Address ReductionList =
5719       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5720   auto IPriv = Privates.begin();
5721   unsigned Idx = 0;
5722   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5723     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5724     CGF.Builder.CreateStore(
5725         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5726             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5727         Elem);
5728     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5729       // Store array size.
5730       ++Idx;
5731       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5732       llvm::Value *Size = CGF.Builder.CreateIntCast(
5733           CGF.getVLASize(
5734                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5735               .NumElts,
5736           CGF.SizeTy, /*isSigned=*/false);
5737       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5738                               Elem);
5739     }
5740   }
5741 
5742   // 2. Emit reduce_func().
5743   llvm::Function *ReductionFn = emitReductionFunction(
5744       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5745       LHSExprs, RHSExprs, ReductionOps);
5746 
5747   // 3. Create static kmp_critical_name lock = { 0 };
5748   std::string Name = getName({"reduction"});
5749   llvm::Value *Lock = getCriticalRegionLock(Name);
5750 
5751   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5752   // RedList, reduce_func, &<lock>);
5753   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5754   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5755   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5756   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5757       ReductionList.getPointer(), CGF.VoidPtrTy);
5758   llvm::Value *Args[] = {
5759       IdentTLoc,                             // ident_t *<loc>
5760       ThreadId,                              // i32 <gtid>
5761       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5762       ReductionArrayTySize,                  // size_type sizeof(RedList)
5763       RL,                                    // void *RedList
5764       ReductionFn, // void (*) (void *, void *) <reduce_func>
5765       Lock         // kmp_critical_name *&<lock>
5766   };
5767   llvm::Value *Res = CGF.EmitRuntimeCall(
5768       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5769                                        : OMPRTL__kmpc_reduce),
5770       Args);
5771 
5772   // 5. Build switch(res)
5773   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5774   llvm::SwitchInst *SwInst =
5775       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5776 
5777   // 6. Build case 1:
5778   //  ...
5779   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5780   //  ...
5781   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5782   // break;
5783   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5784   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5785   CGF.EmitBlock(Case1BB);
5786 
5787   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5788   llvm::Value *EndArgs[] = {
5789       IdentTLoc, // ident_t *<loc>
5790       ThreadId,  // i32 <gtid>
5791       Lock       // kmp_critical_name *&<lock>
5792   };
5793   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5794                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5795     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5796     auto IPriv = Privates.begin();
5797     auto ILHS = LHSExprs.begin();
5798     auto IRHS = RHSExprs.begin();
5799     for (const Expr *E : ReductionOps) {
5800       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5801                                      cast<DeclRefExpr>(*IRHS));
5802       ++IPriv;
5803       ++ILHS;
5804       ++IRHS;
5805     }
5806   };
5807   RegionCodeGenTy RCG(CodeGen);
5808   CommonActionTy Action(
5809       nullptr, llvm::None,
5810       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5811                                        : OMPRTL__kmpc_end_reduce),
5812       EndArgs);
5813   RCG.setAction(Action);
5814   RCG(CGF);
5815 
5816   CGF.EmitBranch(DefaultBB);
5817 
5818   // 7. Build case 2:
5819   //  ...
5820   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5821   //  ...
5822   // break;
5823   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5824   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5825   CGF.EmitBlock(Case2BB);
5826 
5827   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5828                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5829     auto ILHS = LHSExprs.begin();
5830     auto IRHS = RHSExprs.begin();
5831     auto IPriv = Privates.begin();
5832     for (const Expr *E : ReductionOps) {
5833       const Expr *XExpr = nullptr;
5834       const Expr *EExpr = nullptr;
5835       const Expr *UpExpr = nullptr;
5836       BinaryOperatorKind BO = BO_Comma;
5837       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5838         if (BO->getOpcode() == BO_Assign) {
5839           XExpr = BO->getLHS();
5840           UpExpr = BO->getRHS();
5841         }
5842       }
5843       // Try to emit update expression as a simple atomic.
5844       const Expr *RHSExpr = UpExpr;
5845       if (RHSExpr) {
5846         // Analyze RHS part of the whole expression.
5847         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5848                 RHSExpr->IgnoreParenImpCasts())) {
5849           // If this is a conditional operator, analyze its condition for
5850           // min/max reduction operator.
5851           RHSExpr = ACO->getCond();
5852         }
5853         if (const auto *BORHS =
5854                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5855           EExpr = BORHS->getRHS();
5856           BO = BORHS->getOpcode();
5857         }
5858       }
5859       if (XExpr) {
5860         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5861         auto &&AtomicRedGen = [BO, VD,
5862                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5863                                     const Expr *EExpr, const Expr *UpExpr) {
5864           LValue X = CGF.EmitLValue(XExpr);
5865           RValue E;
5866           if (EExpr)
5867             E = CGF.EmitAnyExpr(EExpr);
5868           CGF.EmitOMPAtomicSimpleUpdateExpr(
5869               X, E, BO, /*IsXLHSInRHSPart=*/true,
5870               llvm::AtomicOrdering::Monotonic, Loc,
5871               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5872                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5873                 PrivateScope.addPrivate(
5874                     VD, [&CGF, VD, XRValue, Loc]() {
5875                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5876                       CGF.emitOMPSimpleStore(
5877                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5878                           VD->getType().getNonReferenceType(), Loc);
5879                       return LHSTemp;
5880                     });
5881                 (void)PrivateScope.Privatize();
5882                 return CGF.EmitAnyExpr(UpExpr);
5883               });
5884         };
5885         if ((*IPriv)->getType()->isArrayType()) {
5886           // Emit atomic reduction for array section.
5887           const auto *RHSVar =
5888               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5889           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5890                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5891         } else {
5892           // Emit atomic reduction for array subscript or single variable.
5893           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5894         }
5895       } else {
5896         // Emit as a critical region.
5897         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5898                                            const Expr *, const Expr *) {
5899           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5900           std::string Name = RT.getName({"atomic_reduction"});
5901           RT.emitCriticalRegion(
5902               CGF, Name,
5903               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5904                 Action.Enter(CGF);
5905                 emitReductionCombiner(CGF, E);
5906               },
5907               Loc);
5908         };
5909         if ((*IPriv)->getType()->isArrayType()) {
5910           const auto *LHSVar =
5911               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5912           const auto *RHSVar =
5913               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5914           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5915                                     CritRedGen);
5916         } else {
5917           CritRedGen(CGF, nullptr, nullptr, nullptr);
5918         }
5919       }
5920       ++ILHS;
5921       ++IRHS;
5922       ++IPriv;
5923     }
5924   };
5925   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5926   if (!WithNowait) {
5927     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5928     llvm::Value *EndArgs[] = {
5929         IdentTLoc, // ident_t *<loc>
5930         ThreadId,  // i32 <gtid>
5931         Lock       // kmp_critical_name *&<lock>
5932     };
5933     CommonActionTy Action(nullptr, llvm::None,
5934                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5935                           EndArgs);
5936     AtomicRCG.setAction(Action);
5937     AtomicRCG(CGF);
5938   } else {
5939     AtomicRCG(CGF);
5940   }
5941 
5942   CGF.EmitBranch(DefaultBB);
5943   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5944 }
5945 
5946 /// Generates unique name for artificial threadprivate variables.
5947 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5948 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5949                                       const Expr *Ref) {
5950   SmallString<256> Buffer;
5951   llvm::raw_svector_ostream Out(Buffer);
5952   const clang::DeclRefExpr *DE;
5953   const VarDecl *D = ::getBaseDecl(Ref, DE);
5954   if (!D)
5955     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5956   D = D->getCanonicalDecl();
5957   std::string Name = CGM.getOpenMPRuntime().getName(
5958       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5959   Out << Prefix << Name << "_"
5960       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5961   return Out.str();
5962 }
5963 
5964 /// Emits reduction initializer function:
5965 /// \code
5966 /// void @.red_init(void* %arg) {
5967 /// %0 = bitcast void* %arg to <type>*
5968 /// store <type> <init>, <type>* %0
5969 /// ret void
5970 /// }
5971 /// \endcode
5972 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5973                                            SourceLocation Loc,
5974                                            ReductionCodeGen &RCG, unsigned N) {
5975   ASTContext &C = CGM.getContext();
5976   FunctionArgList Args;
5977   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5978                           ImplicitParamDecl::Other);
5979   Args.emplace_back(&Param);
5980   const auto &FnInfo =
5981       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5982   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5983   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5984   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5985                                     Name, &CGM.getModule());
5986   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5987   Fn->setDoesNotRecurse();
5988   CodeGenFunction CGF(CGM);
5989   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5990   Address PrivateAddr = CGF.EmitLoadOfPointer(
5991       CGF.GetAddrOfLocalVar(&Param),
5992       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5993   llvm::Value *Size = nullptr;
5994   // If the size of the reduction item is non-constant, load it from global
5995   // threadprivate variable.
5996   if (RCG.getSizes(N).second) {
5997     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5998         CGF, CGM.getContext().getSizeType(),
5999         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6000     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6001                                 CGM.getContext().getSizeType(), Loc);
6002   }
6003   RCG.emitAggregateType(CGF, N, Size);
6004   LValue SharedLVal;
6005   // If initializer uses initializer from declare reduction construct, emit a
6006   // pointer to the address of the original reduction item (reuired by reduction
6007   // initializer)
6008   if (RCG.usesReductionInitializer(N)) {
6009     Address SharedAddr =
6010         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6011             CGF, CGM.getContext().VoidPtrTy,
6012             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6013     SharedAddr = CGF.EmitLoadOfPointer(
6014         SharedAddr,
6015         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6016     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6017   } else {
6018     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6019         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6020         CGM.getContext().VoidPtrTy);
6021   }
6022   // Emit the initializer:
6023   // %0 = bitcast void* %arg to <type>*
6024   // store <type> <init>, <type>* %0
6025   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6026                          [](CodeGenFunction &) { return false; });
6027   CGF.FinishFunction();
6028   return Fn;
6029 }
6030 
6031 /// Emits reduction combiner function:
6032 /// \code
6033 /// void @.red_comb(void* %arg0, void* %arg1) {
6034 /// %lhs = bitcast void* %arg0 to <type>*
6035 /// %rhs = bitcast void* %arg1 to <type>*
6036 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6037 /// store <type> %2, <type>* %lhs
6038 /// ret void
6039 /// }
6040 /// \endcode
6041 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6042                                            SourceLocation Loc,
6043                                            ReductionCodeGen &RCG, unsigned N,
6044                                            const Expr *ReductionOp,
6045                                            const Expr *LHS, const Expr *RHS,
6046                                            const Expr *PrivateRef) {
6047   ASTContext &C = CGM.getContext();
6048   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6049   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6050   FunctionArgList Args;
6051   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6052                                C.VoidPtrTy, ImplicitParamDecl::Other);
6053   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6054                             ImplicitParamDecl::Other);
6055   Args.emplace_back(&ParamInOut);
6056   Args.emplace_back(&ParamIn);
6057   const auto &FnInfo =
6058       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6059   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6060   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6061   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6062                                     Name, &CGM.getModule());
6063   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6064   Fn->setDoesNotRecurse();
6065   CodeGenFunction CGF(CGM);
6066   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6067   llvm::Value *Size = nullptr;
6068   // If the size of the reduction item is non-constant, load it from global
6069   // threadprivate variable.
6070   if (RCG.getSizes(N).second) {
6071     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6072         CGF, CGM.getContext().getSizeType(),
6073         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6074     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6075                                 CGM.getContext().getSizeType(), Loc);
6076   }
6077   RCG.emitAggregateType(CGF, N, Size);
6078   // Remap lhs and rhs variables to the addresses of the function arguments.
6079   // %lhs = bitcast void* %arg0 to <type>*
6080   // %rhs = bitcast void* %arg1 to <type>*
6081   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6082   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6083     // Pull out the pointer to the variable.
6084     Address PtrAddr = CGF.EmitLoadOfPointer(
6085         CGF.GetAddrOfLocalVar(&ParamInOut),
6086         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6087     return CGF.Builder.CreateElementBitCast(
6088         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6089   });
6090   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6091     // Pull out the pointer to the variable.
6092     Address PtrAddr = CGF.EmitLoadOfPointer(
6093         CGF.GetAddrOfLocalVar(&ParamIn),
6094         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6095     return CGF.Builder.CreateElementBitCast(
6096         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6097   });
6098   PrivateScope.Privatize();
6099   // Emit the combiner body:
6100   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6101   // store <type> %2, <type>* %lhs
6102   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6103       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6104       cast<DeclRefExpr>(RHS));
6105   CGF.FinishFunction();
6106   return Fn;
6107 }
6108 
6109 /// Emits reduction finalizer function:
6110 /// \code
6111 /// void @.red_fini(void* %arg) {
6112 /// %0 = bitcast void* %arg to <type>*
6113 /// <destroy>(<type>* %0)
6114 /// ret void
6115 /// }
6116 /// \endcode
6117 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6118                                            SourceLocation Loc,
6119                                            ReductionCodeGen &RCG, unsigned N) {
6120   if (!RCG.needCleanups(N))
6121     return nullptr;
6122   ASTContext &C = CGM.getContext();
6123   FunctionArgList Args;
6124   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6125                           ImplicitParamDecl::Other);
6126   Args.emplace_back(&Param);
6127   const auto &FnInfo =
6128       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6129   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6130   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6131   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6132                                     Name, &CGM.getModule());
6133   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6134   Fn->setDoesNotRecurse();
6135   CodeGenFunction CGF(CGM);
6136   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6137   Address PrivateAddr = CGF.EmitLoadOfPointer(
6138       CGF.GetAddrOfLocalVar(&Param),
6139       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6140   llvm::Value *Size = nullptr;
6141   // If the size of the reduction item is non-constant, load it from global
6142   // threadprivate variable.
6143   if (RCG.getSizes(N).second) {
6144     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6145         CGF, CGM.getContext().getSizeType(),
6146         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6147     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6148                                 CGM.getContext().getSizeType(), Loc);
6149   }
6150   RCG.emitAggregateType(CGF, N, Size);
6151   // Emit the finalizer body:
6152   // <destroy>(<type>* %0)
6153   RCG.emitCleanups(CGF, N, PrivateAddr);
6154   CGF.FinishFunction();
6155   return Fn;
6156 }
6157 
6158 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6159     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6160     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6161   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6162     return nullptr;
6163 
6164   // Build typedef struct:
6165   // kmp_task_red_input {
6166   //   void *reduce_shar; // shared reduction item
6167   //   size_t reduce_size; // size of data item
6168   //   void *reduce_init; // data initialization routine
6169   //   void *reduce_fini; // data finalization routine
6170   //   void *reduce_comb; // data combiner routine
6171   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6172   // } kmp_task_red_input_t;
6173   ASTContext &C = CGM.getContext();
6174   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6175   RD->startDefinition();
6176   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6177   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6178   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6179   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6180   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6181   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6182       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6183   RD->completeDefinition();
6184   QualType RDType = C.getRecordType(RD);
6185   unsigned Size = Data.ReductionVars.size();
6186   llvm::APInt ArraySize(/*numBits=*/64, Size);
6187   QualType ArrayRDType = C.getConstantArrayType(
6188       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6189   // kmp_task_red_input_t .rd_input.[Size];
6190   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6191   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6192                        Data.ReductionOps);
6193   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6194     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6195     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6196                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6197     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6198         TaskRedInput.getPointer(), Idxs,
6199         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6200         ".rd_input.gep.");
6201     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6202     // ElemLVal.reduce_shar = &Shareds[Cnt];
6203     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6204     RCG.emitSharedLValue(CGF, Cnt);
6205     llvm::Value *CastedShared =
6206         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6207     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6208     RCG.emitAggregateType(CGF, Cnt);
6209     llvm::Value *SizeValInChars;
6210     llvm::Value *SizeVal;
6211     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6212     // We use delayed creation/initialization for VLAs, array sections and
6213     // custom reduction initializations. It is required because runtime does not
6214     // provide the way to pass the sizes of VLAs/array sections to
6215     // initializer/combiner/finalizer functions and does not pass the pointer to
6216     // original reduction item to the initializer. Instead threadprivate global
6217     // variables are used to store these values and use them in the functions.
6218     bool DelayedCreation = !!SizeVal;
6219     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6220                                                /*isSigned=*/false);
6221     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6222     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6223     // ElemLVal.reduce_init = init;
6224     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6225     llvm::Value *InitAddr =
6226         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6227     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6228     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6229     // ElemLVal.reduce_fini = fini;
6230     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6231     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6232     llvm::Value *FiniAddr = Fini
6233                                 ? CGF.EmitCastToVoidPtr(Fini)
6234                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6235     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6236     // ElemLVal.reduce_comb = comb;
6237     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6238     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6239         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6240         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6241     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6242     // ElemLVal.flags = 0;
6243     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6244     if (DelayedCreation) {
6245       CGF.EmitStoreOfScalar(
6246           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6247           FlagsLVal);
6248     } else
6249       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6250   }
6251   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6252   // *data);
6253   llvm::Value *Args[] = {
6254       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6255                                 /*isSigned=*/true),
6256       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6257       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6258                                                       CGM.VoidPtrTy)};
6259   return CGF.EmitRuntimeCall(
6260       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6261 }
6262 
6263 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6264                                               SourceLocation Loc,
6265                                               ReductionCodeGen &RCG,
6266                                               unsigned N) {
6267   auto Sizes = RCG.getSizes(N);
6268   // Emit threadprivate global variable if the type is non-constant
6269   // (Sizes.second = nullptr).
6270   if (Sizes.second) {
6271     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6272                                                      /*isSigned=*/false);
6273     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6274         CGF, CGM.getContext().getSizeType(),
6275         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6276     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6277   }
6278   // Store address of the original reduction item if custom initializer is used.
6279   if (RCG.usesReductionInitializer(N)) {
6280     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6281         CGF, CGM.getContext().VoidPtrTy,
6282         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6283     CGF.Builder.CreateStore(
6284         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6285             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6286         SharedAddr, /*IsVolatile=*/false);
6287   }
6288 }
6289 
6290 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6291                                               SourceLocation Loc,
6292                                               llvm::Value *ReductionsPtr,
6293                                               LValue SharedLVal) {
6294   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6295   // *d);
6296   llvm::Value *Args[] = {
6297       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6298                                 /*isSigned=*/true),
6299       ReductionsPtr,
6300       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6301                                                       CGM.VoidPtrTy)};
6302   return Address(
6303       CGF.EmitRuntimeCall(
6304           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6305       SharedLVal.getAlignment());
6306 }
6307 
6308 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6309                                        SourceLocation Loc) {
6310   if (!CGF.HaveInsertPoint())
6311     return;
6312   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6313   // global_tid);
6314   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6315   // Ignore return result until untied tasks are supported.
6316   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6317   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6318     Region->emitUntiedSwitch(CGF);
6319 }
6320 
6321 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6322                                            OpenMPDirectiveKind InnerKind,
6323                                            const RegionCodeGenTy &CodeGen,
6324                                            bool HasCancel) {
6325   if (!CGF.HaveInsertPoint())
6326     return;
6327   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6328   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6329 }
6330 
6331 namespace {
6332 enum RTCancelKind {
6333   CancelNoreq = 0,
6334   CancelParallel = 1,
6335   CancelLoop = 2,
6336   CancelSections = 3,
6337   CancelTaskgroup = 4
6338 };
6339 } // anonymous namespace
6340 
6341 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6342   RTCancelKind CancelKind = CancelNoreq;
6343   if (CancelRegion == OMPD_parallel)
6344     CancelKind = CancelParallel;
6345   else if (CancelRegion == OMPD_for)
6346     CancelKind = CancelLoop;
6347   else if (CancelRegion == OMPD_sections)
6348     CancelKind = CancelSections;
6349   else {
6350     assert(CancelRegion == OMPD_taskgroup);
6351     CancelKind = CancelTaskgroup;
6352   }
6353   return CancelKind;
6354 }
6355 
6356 void CGOpenMPRuntime::emitCancellationPointCall(
6357     CodeGenFunction &CGF, SourceLocation Loc,
6358     OpenMPDirectiveKind CancelRegion) {
6359   if (!CGF.HaveInsertPoint())
6360     return;
6361   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6362   // global_tid, kmp_int32 cncl_kind);
6363   if (auto *OMPRegionInfo =
6364           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6365     // For 'cancellation point taskgroup', the task region info may not have a
6366     // cancel. This may instead happen in another adjacent task.
6367     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6368       llvm::Value *Args[] = {
6369           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6370           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6371       // Ignore return result until untied tasks are supported.
6372       llvm::Value *Result = CGF.EmitRuntimeCall(
6373           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6374       // if (__kmpc_cancellationpoint()) {
6375       //   exit from construct;
6376       // }
6377       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6378       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6379       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6380       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6381       CGF.EmitBlock(ExitBB);
6382       // exit from construct;
6383       CodeGenFunction::JumpDest CancelDest =
6384           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6385       CGF.EmitBranchThroughCleanup(CancelDest);
6386       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6387     }
6388   }
6389 }
6390 
6391 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6392                                      const Expr *IfCond,
6393                                      OpenMPDirectiveKind CancelRegion) {
6394   if (!CGF.HaveInsertPoint())
6395     return;
6396   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6397   // kmp_int32 cncl_kind);
6398   if (auto *OMPRegionInfo =
6399           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6400     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6401                                                         PrePostActionTy &) {
6402       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6403       llvm::Value *Args[] = {
6404           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6405           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6406       // Ignore return result until untied tasks are supported.
6407       llvm::Value *Result = CGF.EmitRuntimeCall(
6408           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6409       // if (__kmpc_cancel()) {
6410       //   exit from construct;
6411       // }
6412       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6413       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6414       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6415       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6416       CGF.EmitBlock(ExitBB);
6417       // exit from construct;
6418       CodeGenFunction::JumpDest CancelDest =
6419           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6420       CGF.EmitBranchThroughCleanup(CancelDest);
6421       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6422     };
6423     if (IfCond) {
6424       emitOMPIfClause(CGF, IfCond, ThenGen,
6425                       [](CodeGenFunction &, PrePostActionTy &) {});
6426     } else {
6427       RegionCodeGenTy ThenRCG(ThenGen);
6428       ThenRCG(CGF);
6429     }
6430   }
6431 }
6432 
6433 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6434     const OMPExecutableDirective &D, StringRef ParentName,
6435     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6436     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6437   assert(!ParentName.empty() && "Invalid target region parent name!");
6438   HasEmittedTargetRegion = true;
6439   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6440                                    IsOffloadEntry, CodeGen);
6441 }
6442 
6443 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6444     const OMPExecutableDirective &D, StringRef ParentName,
6445     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6446     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6447   // Create a unique name for the entry function using the source location
6448   // information of the current target region. The name will be something like:
6449   //
6450   // __omp_offloading_DD_FFFF_PP_lBB
6451   //
6452   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6453   // mangled name of the function that encloses the target region and BB is the
6454   // line number of the target region.
6455 
6456   unsigned DeviceID;
6457   unsigned FileID;
6458   unsigned Line;
6459   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6460                            Line);
6461   SmallString<64> EntryFnName;
6462   {
6463     llvm::raw_svector_ostream OS(EntryFnName);
6464     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6465        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6466   }
6467 
6468   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6469 
6470   CodeGenFunction CGF(CGM, true);
6471   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6472   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6473 
6474   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6475 
6476   // If this target outline function is not an offload entry, we don't need to
6477   // register it.
6478   if (!IsOffloadEntry)
6479     return;
6480 
6481   // The target region ID is used by the runtime library to identify the current
6482   // target region, so it only has to be unique and not necessarily point to
6483   // anything. It could be the pointer to the outlined function that implements
6484   // the target region, but we aren't using that so that the compiler doesn't
6485   // need to keep that, and could therefore inline the host function if proven
6486   // worthwhile during optimization. In the other hand, if emitting code for the
6487   // device, the ID has to be the function address so that it can retrieved from
6488   // the offloading entry and launched by the runtime library. We also mark the
6489   // outlined function to have external linkage in case we are emitting code for
6490   // the device, because these functions will be entry points to the device.
6491 
6492   if (CGM.getLangOpts().OpenMPIsDevice) {
6493     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6494     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6495     OutlinedFn->setDSOLocal(false);
6496   } else {
6497     std::string Name = getName({EntryFnName, "region_id"});
6498     OutlinedFnID = new llvm::GlobalVariable(
6499         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6500         llvm::GlobalValue::WeakAnyLinkage,
6501         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6502   }
6503 
6504   // Register the information for the entry associated with this target region.
6505   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6506       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6507       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6508 }
6509 
6510 /// Checks if the expression is constant or does not have non-trivial function
6511 /// calls.
6512 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6513   // We can skip constant expressions.
6514   // We can skip expressions with trivial calls or simple expressions.
6515   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6516           !E->hasNonTrivialCall(Ctx)) &&
6517          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6518 }
6519 
6520 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6521                                                     const Stmt *Body) {
6522   const Stmt *Child = Body->IgnoreContainers();
6523   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6524     Child = nullptr;
6525     for (const Stmt *S : C->body()) {
6526       if (const auto *E = dyn_cast<Expr>(S)) {
6527         if (isTrivial(Ctx, E))
6528           continue;
6529       }
6530       // Some of the statements can be ignored.
6531       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6532           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6533         continue;
6534       // Analyze declarations.
6535       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6536         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6537               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6538                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6539                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6540                   isa<UsingDirectiveDecl>(D) ||
6541                   isa<OMPDeclareReductionDecl>(D) ||
6542                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6543                 return true;
6544               const auto *VD = dyn_cast<VarDecl>(D);
6545               if (!VD)
6546                 return false;
6547               return VD->isConstexpr() ||
6548                      ((VD->getType().isTrivialType(Ctx) ||
6549                        VD->getType()->isReferenceType()) &&
6550                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6551             }))
6552           continue;
6553       }
6554       // Found multiple children - cannot get the one child only.
6555       if (Child)
6556         return nullptr;
6557       Child = S;
6558     }
6559     if (Child)
6560       Child = Child->IgnoreContainers();
6561   }
6562   return Child;
6563 }
6564 
6565 /// Emit the number of teams for a target directive.  Inspect the num_teams
6566 /// clause associated with a teams construct combined or closely nested
6567 /// with the target directive.
6568 ///
6569 /// Emit a team of size one for directives such as 'target parallel' that
6570 /// have no associated teams construct.
6571 ///
6572 /// Otherwise, return nullptr.
6573 static llvm::Value *
6574 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6575                                const OMPExecutableDirective &D) {
6576   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6577          "Clauses associated with the teams directive expected to be emitted "
6578          "only for the host!");
6579   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6580   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6581          "Expected target-based executable directive.");
6582   CGBuilderTy &Bld = CGF.Builder;
6583   switch (DirectiveKind) {
6584   case OMPD_target: {
6585     const auto *CS = D.getInnermostCapturedStmt();
6586     const auto *Body =
6587         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6588     const Stmt *ChildStmt =
6589         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6590     if (const auto *NestedDir =
6591             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6592       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6593         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6594           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6595           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6596           const Expr *NumTeams =
6597               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6598           llvm::Value *NumTeamsVal =
6599               CGF.EmitScalarExpr(NumTeams,
6600                                  /*IgnoreResultAssign*/ true);
6601           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6602                                    /*IsSigned=*/true);
6603         }
6604         return Bld.getInt32(0);
6605       }
6606       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6607           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6608         return Bld.getInt32(1);
6609       return Bld.getInt32(0);
6610     }
6611     return nullptr;
6612   }
6613   case OMPD_target_teams:
6614   case OMPD_target_teams_distribute:
6615   case OMPD_target_teams_distribute_simd:
6616   case OMPD_target_teams_distribute_parallel_for:
6617   case OMPD_target_teams_distribute_parallel_for_simd: {
6618     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6619       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6620       const Expr *NumTeams =
6621           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6622       llvm::Value *NumTeamsVal =
6623           CGF.EmitScalarExpr(NumTeams,
6624                              /*IgnoreResultAssign*/ true);
6625       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6626                                /*IsSigned=*/true);
6627     }
6628     return Bld.getInt32(0);
6629   }
6630   case OMPD_target_parallel:
6631   case OMPD_target_parallel_for:
6632   case OMPD_target_parallel_for_simd:
6633   case OMPD_target_simd:
6634     return Bld.getInt32(1);
6635   case OMPD_parallel:
6636   case OMPD_for:
6637   case OMPD_parallel_for:
6638   case OMPD_parallel_sections:
6639   case OMPD_for_simd:
6640   case OMPD_parallel_for_simd:
6641   case OMPD_cancel:
6642   case OMPD_cancellation_point:
6643   case OMPD_ordered:
6644   case OMPD_threadprivate:
6645   case OMPD_allocate:
6646   case OMPD_task:
6647   case OMPD_simd:
6648   case OMPD_sections:
6649   case OMPD_section:
6650   case OMPD_single:
6651   case OMPD_master:
6652   case OMPD_critical:
6653   case OMPD_taskyield:
6654   case OMPD_barrier:
6655   case OMPD_taskwait:
6656   case OMPD_taskgroup:
6657   case OMPD_atomic:
6658   case OMPD_flush:
6659   case OMPD_teams:
6660   case OMPD_target_data:
6661   case OMPD_target_exit_data:
6662   case OMPD_target_enter_data:
6663   case OMPD_distribute:
6664   case OMPD_distribute_simd:
6665   case OMPD_distribute_parallel_for:
6666   case OMPD_distribute_parallel_for_simd:
6667   case OMPD_teams_distribute:
6668   case OMPD_teams_distribute_simd:
6669   case OMPD_teams_distribute_parallel_for:
6670   case OMPD_teams_distribute_parallel_for_simd:
6671   case OMPD_target_update:
6672   case OMPD_declare_simd:
6673   case OMPD_declare_target:
6674   case OMPD_end_declare_target:
6675   case OMPD_declare_reduction:
6676   case OMPD_declare_mapper:
6677   case OMPD_taskloop:
6678   case OMPD_taskloop_simd:
6679   case OMPD_requires:
6680   case OMPD_unknown:
6681     break;
6682   }
6683   llvm_unreachable("Unexpected directive kind.");
6684 }
6685 
6686 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6687                                   llvm::Value *DefaultThreadLimitVal) {
6688   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6689       CGF.getContext(), CS->getCapturedStmt());
6690   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6691     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6692       llvm::Value *NumThreads = nullptr;
6693       llvm::Value *CondVal = nullptr;
6694       // Handle if clause. If if clause present, the number of threads is
6695       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6696       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6697         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6698         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6699         const OMPIfClause *IfClause = nullptr;
6700         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6701           if (C->getNameModifier() == OMPD_unknown ||
6702               C->getNameModifier() == OMPD_parallel) {
6703             IfClause = C;
6704             break;
6705           }
6706         }
6707         if (IfClause) {
6708           const Expr *Cond = IfClause->getCondition();
6709           bool Result;
6710           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6711             if (!Result)
6712               return CGF.Builder.getInt32(1);
6713           } else {
6714             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6715             if (const auto *PreInit =
6716                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6717               for (const auto *I : PreInit->decls()) {
6718                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6719                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6720                 } else {
6721                   CodeGenFunction::AutoVarEmission Emission =
6722                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6723                   CGF.EmitAutoVarCleanups(Emission);
6724                 }
6725               }
6726             }
6727             CondVal = CGF.EvaluateExprAsBool(Cond);
6728           }
6729         }
6730       }
6731       // Check the value of num_threads clause iff if clause was not specified
6732       // or is not evaluated to false.
6733       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6734         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6735         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6736         const auto *NumThreadsClause =
6737             Dir->getSingleClause<OMPNumThreadsClause>();
6738         CodeGenFunction::LexicalScope Scope(
6739             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6740         if (const auto *PreInit =
6741                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6742           for (const auto *I : PreInit->decls()) {
6743             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6744               CGF.EmitVarDecl(cast<VarDecl>(*I));
6745             } else {
6746               CodeGenFunction::AutoVarEmission Emission =
6747                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6748               CGF.EmitAutoVarCleanups(Emission);
6749             }
6750           }
6751         }
6752         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6753         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6754                                                /*IsSigned=*/false);
6755         if (DefaultThreadLimitVal)
6756           NumThreads = CGF.Builder.CreateSelect(
6757               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6758               DefaultThreadLimitVal, NumThreads);
6759       } else {
6760         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6761                                            : CGF.Builder.getInt32(0);
6762       }
6763       // Process condition of the if clause.
6764       if (CondVal) {
6765         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6766                                               CGF.Builder.getInt32(1));
6767       }
6768       return NumThreads;
6769     }
6770     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6771       return CGF.Builder.getInt32(1);
6772     return DefaultThreadLimitVal;
6773   }
6774   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6775                                : CGF.Builder.getInt32(0);
6776 }
6777 
6778 /// Emit the number of threads for a target directive.  Inspect the
6779 /// thread_limit clause associated with a teams construct combined or closely
6780 /// nested with the target directive.
6781 ///
6782 /// Emit the num_threads clause for directives such as 'target parallel' that
6783 /// have no associated teams construct.
6784 ///
6785 /// Otherwise, return nullptr.
6786 static llvm::Value *
6787 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6788                                  const OMPExecutableDirective &D) {
6789   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6790          "Clauses associated with the teams directive expected to be emitted "
6791          "only for the host!");
6792   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6793   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6794          "Expected target-based executable directive.");
6795   CGBuilderTy &Bld = CGF.Builder;
6796   llvm::Value *ThreadLimitVal = nullptr;
6797   llvm::Value *NumThreadsVal = nullptr;
6798   switch (DirectiveKind) {
6799   case OMPD_target: {
6800     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6801     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6802       return NumThreads;
6803     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6804         CGF.getContext(), CS->getCapturedStmt());
6805     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6806       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6807         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6808         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6809         const auto *ThreadLimitClause =
6810             Dir->getSingleClause<OMPThreadLimitClause>();
6811         CodeGenFunction::LexicalScope Scope(
6812             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6813         if (const auto *PreInit =
6814                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6815           for (const auto *I : PreInit->decls()) {
6816             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6817               CGF.EmitVarDecl(cast<VarDecl>(*I));
6818             } else {
6819               CodeGenFunction::AutoVarEmission Emission =
6820                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6821               CGF.EmitAutoVarCleanups(Emission);
6822             }
6823           }
6824         }
6825         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6826             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6827         ThreadLimitVal =
6828             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6829       }
6830       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6831           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6832         CS = Dir->getInnermostCapturedStmt();
6833         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6834             CGF.getContext(), CS->getCapturedStmt());
6835         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6836       }
6837       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6838           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6839         CS = Dir->getInnermostCapturedStmt();
6840         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6841           return NumThreads;
6842       }
6843       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6844         return Bld.getInt32(1);
6845     }
6846     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6847   }
6848   case OMPD_target_teams: {
6849     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6850       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6851       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6852       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6853           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6854       ThreadLimitVal =
6855           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6856     }
6857     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6858     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859       return NumThreads;
6860     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6861         CGF.getContext(), CS->getCapturedStmt());
6862     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6863       if (Dir->getDirectiveKind() == OMPD_distribute) {
6864         CS = Dir->getInnermostCapturedStmt();
6865         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6866           return NumThreads;
6867       }
6868     }
6869     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6870   }
6871   case OMPD_target_teams_distribute:
6872     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6873       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6874       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6875       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6876           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6877       ThreadLimitVal =
6878           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6879     }
6880     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6881   case OMPD_target_parallel:
6882   case OMPD_target_parallel_for:
6883   case OMPD_target_parallel_for_simd:
6884   case OMPD_target_teams_distribute_parallel_for:
6885   case OMPD_target_teams_distribute_parallel_for_simd: {
6886     llvm::Value *CondVal = nullptr;
6887     // Handle if clause. If if clause present, the number of threads is
6888     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6889     if (D.hasClausesOfKind<OMPIfClause>()) {
6890       const OMPIfClause *IfClause = nullptr;
6891       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6892         if (C->getNameModifier() == OMPD_unknown ||
6893             C->getNameModifier() == OMPD_parallel) {
6894           IfClause = C;
6895           break;
6896         }
6897       }
6898       if (IfClause) {
6899         const Expr *Cond = IfClause->getCondition();
6900         bool Result;
6901         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6902           if (!Result)
6903             return Bld.getInt32(1);
6904         } else {
6905           CodeGenFunction::RunCleanupsScope Scope(CGF);
6906           CondVal = CGF.EvaluateExprAsBool(Cond);
6907         }
6908       }
6909     }
6910     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6911       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6912       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6913       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6914           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6915       ThreadLimitVal =
6916           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6917     }
6918     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6919       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6920       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6921       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6922           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6923       NumThreadsVal =
6924           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6925       ThreadLimitVal = ThreadLimitVal
6926                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6927                                                                 ThreadLimitVal),
6928                                               NumThreadsVal, ThreadLimitVal)
6929                            : NumThreadsVal;
6930     }
6931     if (!ThreadLimitVal)
6932       ThreadLimitVal = Bld.getInt32(0);
6933     if (CondVal)
6934       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6935     return ThreadLimitVal;
6936   }
6937   case OMPD_target_teams_distribute_simd:
6938   case OMPD_target_simd:
6939     return Bld.getInt32(1);
6940   case OMPD_parallel:
6941   case OMPD_for:
6942   case OMPD_parallel_for:
6943   case OMPD_parallel_sections:
6944   case OMPD_for_simd:
6945   case OMPD_parallel_for_simd:
6946   case OMPD_cancel:
6947   case OMPD_cancellation_point:
6948   case OMPD_ordered:
6949   case OMPD_threadprivate:
6950   case OMPD_allocate:
6951   case OMPD_task:
6952   case OMPD_simd:
6953   case OMPD_sections:
6954   case OMPD_section:
6955   case OMPD_single:
6956   case OMPD_master:
6957   case OMPD_critical:
6958   case OMPD_taskyield:
6959   case OMPD_barrier:
6960   case OMPD_taskwait:
6961   case OMPD_taskgroup:
6962   case OMPD_atomic:
6963   case OMPD_flush:
6964   case OMPD_teams:
6965   case OMPD_target_data:
6966   case OMPD_target_exit_data:
6967   case OMPD_target_enter_data:
6968   case OMPD_distribute:
6969   case OMPD_distribute_simd:
6970   case OMPD_distribute_parallel_for:
6971   case OMPD_distribute_parallel_for_simd:
6972   case OMPD_teams_distribute:
6973   case OMPD_teams_distribute_simd:
6974   case OMPD_teams_distribute_parallel_for:
6975   case OMPD_teams_distribute_parallel_for_simd:
6976   case OMPD_target_update:
6977   case OMPD_declare_simd:
6978   case OMPD_declare_target:
6979   case OMPD_end_declare_target:
6980   case OMPD_declare_reduction:
6981   case OMPD_declare_mapper:
6982   case OMPD_taskloop:
6983   case OMPD_taskloop_simd:
6984   case OMPD_requires:
6985   case OMPD_unknown:
6986     break;
6987   }
6988   llvm_unreachable("Unsupported directive kind.");
6989 }
6990 
6991 namespace {
6992 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6993 
6994 // Utility to handle information from clauses associated with a given
6995 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6996 // It provides a convenient interface to obtain the information and generate
6997 // code for that information.
6998 class MappableExprsHandler {
6999 public:
7000   /// Values for bit flags used to specify the mapping type for
7001   /// offloading.
7002   enum OpenMPOffloadMappingFlags : uint64_t {
7003     /// No flags
7004     OMP_MAP_NONE = 0x0,
7005     /// Allocate memory on the device and move data from host to device.
7006     OMP_MAP_TO = 0x01,
7007     /// Allocate memory on the device and move data from device to host.
7008     OMP_MAP_FROM = 0x02,
7009     /// Always perform the requested mapping action on the element, even
7010     /// if it was already mapped before.
7011     OMP_MAP_ALWAYS = 0x04,
7012     /// Delete the element from the device environment, ignoring the
7013     /// current reference count associated with the element.
7014     OMP_MAP_DELETE = 0x08,
7015     /// The element being mapped is a pointer-pointee pair; both the
7016     /// pointer and the pointee should be mapped.
7017     OMP_MAP_PTR_AND_OBJ = 0x10,
7018     /// This flags signals that the base address of an entry should be
7019     /// passed to the target kernel as an argument.
7020     OMP_MAP_TARGET_PARAM = 0x20,
7021     /// Signal that the runtime library has to return the device pointer
7022     /// in the current position for the data being mapped. Used when we have the
7023     /// use_device_ptr clause.
7024     OMP_MAP_RETURN_PARAM = 0x40,
7025     /// This flag signals that the reference being passed is a pointer to
7026     /// private data.
7027     OMP_MAP_PRIVATE = 0x80,
7028     /// Pass the element to the device by value.
7029     OMP_MAP_LITERAL = 0x100,
7030     /// Implicit map
7031     OMP_MAP_IMPLICIT = 0x200,
7032     /// The 16 MSBs of the flags indicate whether the entry is member of some
7033     /// struct/class.
7034     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7035     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7036   };
7037 
7038   /// Class that associates information with a base pointer to be passed to the
7039   /// runtime library.
7040   class BasePointerInfo {
7041     /// The base pointer.
7042     llvm::Value *Ptr = nullptr;
7043     /// The base declaration that refers to this device pointer, or null if
7044     /// there is none.
7045     const ValueDecl *DevPtrDecl = nullptr;
7046 
7047   public:
7048     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7049         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7050     llvm::Value *operator*() const { return Ptr; }
7051     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7052     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7053   };
7054 
7055   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7056   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7057   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7058 
7059   /// Map between a struct and the its lowest & highest elements which have been
7060   /// mapped.
7061   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7062   ///                    HE(FieldIndex, Pointer)}
7063   struct StructRangeInfoTy {
7064     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7065         0, Address::invalid()};
7066     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7067         0, Address::invalid()};
7068     Address Base = Address::invalid();
7069   };
7070 
7071 private:
7072   /// Kind that defines how a device pointer has to be returned.
7073   struct MapInfo {
7074     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7075     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7076     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7077     bool ReturnDevicePointer = false;
7078     bool IsImplicit = false;
7079 
7080     MapInfo() = default;
7081     MapInfo(
7082         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7083         OpenMPMapClauseKind MapType,
7084         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7085         bool ReturnDevicePointer, bool IsImplicit)
7086         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7087           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7088   };
7089 
7090   /// If use_device_ptr is used on a pointer which is a struct member and there
7091   /// is no map information about it, then emission of that entry is deferred
7092   /// until the whole struct has been processed.
7093   struct DeferredDevicePtrEntryTy {
7094     const Expr *IE = nullptr;
7095     const ValueDecl *VD = nullptr;
7096 
7097     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7098         : IE(IE), VD(VD) {}
7099   };
7100 
7101   /// Directive from where the map clauses were extracted.
7102   const OMPExecutableDirective &CurDir;
7103 
7104   /// Function the directive is being generated for.
7105   CodeGenFunction &CGF;
7106 
7107   /// Set of all first private variables in the current directive.
7108   llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
7109 
7110   /// Map between device pointer declarations and their expression components.
7111   /// The key value for declarations in 'this' is null.
7112   llvm::DenseMap<
7113       const ValueDecl *,
7114       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7115       DevPointersMap;
7116 
7117   llvm::Value *getExprTypeSize(const Expr *E) const {
7118     QualType ExprTy = E->getType().getCanonicalType();
7119 
7120     // Reference types are ignored for mapping purposes.
7121     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7122       ExprTy = RefTy->getPointeeType().getCanonicalType();
7123 
7124     // Given that an array section is considered a built-in type, we need to
7125     // do the calculation based on the length of the section instead of relying
7126     // on CGF.getTypeSize(E->getType()).
7127     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7128       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7129                             OAE->getBase()->IgnoreParenImpCasts())
7130                             .getCanonicalType();
7131 
7132       // If there is no length associated with the expression, that means we
7133       // are using the whole length of the base.
7134       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7135         return CGF.getTypeSize(BaseTy);
7136 
7137       llvm::Value *ElemSize;
7138       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7139         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7140       } else {
7141         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7142         assert(ATy && "Expecting array type if not a pointer type.");
7143         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7144       }
7145 
7146       // If we don't have a length at this point, that is because we have an
7147       // array section with a single element.
7148       if (!OAE->getLength())
7149         return ElemSize;
7150 
7151       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7152       LengthVal =
7153           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7154       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7155     }
7156     return CGF.getTypeSize(ExprTy);
7157   }
7158 
7159   /// Return the corresponding bits for a given map clause modifier. Add
7160   /// a flag marking the map as a pointer if requested. Add a flag marking the
7161   /// map as the first one of a series of maps that relate to the same map
7162   /// expression.
7163   OpenMPOffloadMappingFlags getMapTypeBits(
7164       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7165       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7166     OpenMPOffloadMappingFlags Bits =
7167         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7168     switch (MapType) {
7169     case OMPC_MAP_alloc:
7170     case OMPC_MAP_release:
7171       // alloc and release is the default behavior in the runtime library,  i.e.
7172       // if we don't pass any bits alloc/release that is what the runtime is
7173       // going to do. Therefore, we don't need to signal anything for these two
7174       // type modifiers.
7175       break;
7176     case OMPC_MAP_to:
7177       Bits |= OMP_MAP_TO;
7178       break;
7179     case OMPC_MAP_from:
7180       Bits |= OMP_MAP_FROM;
7181       break;
7182     case OMPC_MAP_tofrom:
7183       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7184       break;
7185     case OMPC_MAP_delete:
7186       Bits |= OMP_MAP_DELETE;
7187       break;
7188     case OMPC_MAP_unknown:
7189       llvm_unreachable("Unexpected map type!");
7190     }
7191     if (AddPtrFlag)
7192       Bits |= OMP_MAP_PTR_AND_OBJ;
7193     if (AddIsTargetParamFlag)
7194       Bits |= OMP_MAP_TARGET_PARAM;
7195     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7196         != MapModifiers.end())
7197       Bits |= OMP_MAP_ALWAYS;
7198     return Bits;
7199   }
7200 
7201   /// Return true if the provided expression is a final array section. A
7202   /// final array section, is one whose length can't be proved to be one.
7203   bool isFinalArraySectionExpression(const Expr *E) const {
7204     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7205 
7206     // It is not an array section and therefore not a unity-size one.
7207     if (!OASE)
7208       return false;
7209 
7210     // An array section with no colon always refer to a single element.
7211     if (OASE->getColonLoc().isInvalid())
7212       return false;
7213 
7214     const Expr *Length = OASE->getLength();
7215 
7216     // If we don't have a length we have to check if the array has size 1
7217     // for this dimension. Also, we should always expect a length if the
7218     // base type is pointer.
7219     if (!Length) {
7220       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7221                              OASE->getBase()->IgnoreParenImpCasts())
7222                              .getCanonicalType();
7223       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7224         return ATy->getSize().getSExtValue() != 1;
7225       // If we don't have a constant dimension length, we have to consider
7226       // the current section as having any size, so it is not necessarily
7227       // unitary. If it happen to be unity size, that's user fault.
7228       return true;
7229     }
7230 
7231     // Check if the length evaluates to 1.
7232     Expr::EvalResult Result;
7233     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7234       return true; // Can have more that size 1.
7235 
7236     llvm::APSInt ConstLength = Result.Val.getInt();
7237     return ConstLength.getSExtValue() != 1;
7238   }
7239 
7240   /// Generate the base pointers, section pointers, sizes and map type
7241   /// bits for the provided map type, map modifier, and expression components.
7242   /// \a IsFirstComponent should be set to true if the provided set of
7243   /// components is the first associated with a capture.
7244   void generateInfoForComponentList(
7245       OpenMPMapClauseKind MapType,
7246       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7247       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7248       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7249       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7250       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7251       bool IsImplicit,
7252       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7253           OverlappedElements = llvm::None) const {
7254     // The following summarizes what has to be generated for each map and the
7255     // types below. The generated information is expressed in this order:
7256     // base pointer, section pointer, size, flags
7257     // (to add to the ones that come from the map type and modifier).
7258     //
7259     // double d;
7260     // int i[100];
7261     // float *p;
7262     //
7263     // struct S1 {
7264     //   int i;
7265     //   float f[50];
7266     // }
7267     // struct S2 {
7268     //   int i;
7269     //   float f[50];
7270     //   S1 s;
7271     //   double *p;
7272     //   struct S2 *ps;
7273     // }
7274     // S2 s;
7275     // S2 *ps;
7276     //
7277     // map(d)
7278     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7279     //
7280     // map(i)
7281     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7282     //
7283     // map(i[1:23])
7284     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7285     //
7286     // map(p)
7287     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7288     //
7289     // map(p[1:24])
7290     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7291     //
7292     // map(s)
7293     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7294     //
7295     // map(s.i)
7296     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7297     //
7298     // map(s.s.f)
7299     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7300     //
7301     // map(s.p)
7302     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7303     //
7304     // map(to: s.p[:22])
7305     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7306     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7307     // &(s.p), &(s.p[0]), 22*sizeof(double),
7308     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7309     // (*) alloc space for struct members, only this is a target parameter
7310     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7311     //      optimizes this entry out, same in the examples below)
7312     // (***) map the pointee (map: to)
7313     //
7314     // map(s.ps)
7315     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7316     //
7317     // map(from: s.ps->s.i)
7318     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7319     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7320     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7321     //
7322     // map(to: s.ps->ps)
7323     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7324     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7325     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7326     //
7327     // map(s.ps->ps->ps)
7328     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7329     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7330     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7331     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7332     //
7333     // map(to: s.ps->ps->s.f[:22])
7334     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7335     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7336     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7337     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7338     //
7339     // map(ps)
7340     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7341     //
7342     // map(ps->i)
7343     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7344     //
7345     // map(ps->s.f)
7346     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7347     //
7348     // map(from: ps->p)
7349     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7350     //
7351     // map(to: ps->p[:22])
7352     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7353     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7354     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7355     //
7356     // map(ps->ps)
7357     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7358     //
7359     // map(from: ps->ps->s.i)
7360     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7361     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7362     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7363     //
7364     // map(from: ps->ps->ps)
7365     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7366     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7367     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7368     //
7369     // map(ps->ps->ps->ps)
7370     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7371     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7372     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7373     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7374     //
7375     // map(to: ps->ps->ps->s.f[:22])
7376     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7377     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7378     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7379     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7380     //
7381     // map(to: s.f[:22]) map(from: s.p[:33])
7382     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7383     //     sizeof(double*) (**), TARGET_PARAM
7384     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7385     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7386     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7387     // (*) allocate contiguous space needed to fit all mapped members even if
7388     //     we allocate space for members not mapped (in this example,
7389     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7390     //     them as well because they fall between &s.f[0] and &s.p)
7391     //
7392     // map(from: s.f[:22]) map(to: ps->p[:33])
7393     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7394     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7395     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7396     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7397     // (*) the struct this entry pertains to is the 2nd element in the list of
7398     //     arguments, hence MEMBER_OF(2)
7399     //
7400     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7401     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7402     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7403     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7404     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7405     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7406     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7407     // (*) the struct this entry pertains to is the 4th element in the list
7408     //     of arguments, hence MEMBER_OF(4)
7409 
7410     // Track if the map information being generated is the first for a capture.
7411     bool IsCaptureFirstInfo = IsFirstComponentList;
7412     bool IsLink = false; // Is this variable a "declare target link"?
7413 
7414     // Scan the components from the base to the complete expression.
7415     auto CI = Components.rbegin();
7416     auto CE = Components.rend();
7417     auto I = CI;
7418 
7419     // Track if the map information being generated is the first for a list of
7420     // components.
7421     bool IsExpressionFirstInfo = true;
7422     Address BP = Address::invalid();
7423     const Expr *AssocExpr = I->getAssociatedExpression();
7424     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7425     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7426 
7427     if (isa<MemberExpr>(AssocExpr)) {
7428       // The base is the 'this' pointer. The content of the pointer is going
7429       // to be the base of the field being mapped.
7430       BP = CGF.LoadCXXThisAddress();
7431     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7432                (OASE &&
7433                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7434       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7435     } else {
7436       // The base is the reference to the variable.
7437       // BP = &Var.
7438       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7439       if (const auto *VD =
7440               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7441         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7442                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7443           if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7444             IsLink = true;
7445             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7446           }
7447       }
7448 
7449       // If the variable is a pointer and is being dereferenced (i.e. is not
7450       // the last component), the base has to be the pointer itself, not its
7451       // reference. References are ignored for mapping purposes.
7452       QualType Ty =
7453           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7454       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7455         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7456 
7457         // We do not need to generate individual map information for the
7458         // pointer, it can be associated with the combined storage.
7459         ++I;
7460       }
7461     }
7462 
7463     // Track whether a component of the list should be marked as MEMBER_OF some
7464     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7465     // in a component list should be marked as MEMBER_OF, all subsequent entries
7466     // do not belong to the base struct. E.g.
7467     // struct S2 s;
7468     // s.ps->ps->ps->f[:]
7469     //   (1) (2) (3) (4)
7470     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7471     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7472     // is the pointee of ps(2) which is not member of struct s, so it should not
7473     // be marked as such (it is still PTR_AND_OBJ).
7474     // The variable is initialized to false so that PTR_AND_OBJ entries which
7475     // are not struct members are not considered (e.g. array of pointers to
7476     // data).
7477     bool ShouldBeMemberOf = false;
7478 
7479     // Variable keeping track of whether or not we have encountered a component
7480     // in the component list which is a member expression. Useful when we have a
7481     // pointer or a final array section, in which case it is the previous
7482     // component in the list which tells us whether we have a member expression.
7483     // E.g. X.f[:]
7484     // While processing the final array section "[:]" it is "f" which tells us
7485     // whether we are dealing with a member of a declared struct.
7486     const MemberExpr *EncounteredME = nullptr;
7487 
7488     for (; I != CE; ++I) {
7489       // If the current component is member of a struct (parent struct) mark it.
7490       if (!EncounteredME) {
7491         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7492         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7493         // as MEMBER_OF the parent struct.
7494         if (EncounteredME)
7495           ShouldBeMemberOf = true;
7496       }
7497 
7498       auto Next = std::next(I);
7499 
7500       // We need to generate the addresses and sizes if this is the last
7501       // component, if the component is a pointer or if it is an array section
7502       // whose length can't be proved to be one. If this is a pointer, it
7503       // becomes the base address for the following components.
7504 
7505       // A final array section, is one whose length can't be proved to be one.
7506       bool IsFinalArraySection =
7507           isFinalArraySectionExpression(I->getAssociatedExpression());
7508 
7509       // Get information on whether the element is a pointer. Have to do a
7510       // special treatment for array sections given that they are built-in
7511       // types.
7512       const auto *OASE =
7513           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7514       bool IsPointer =
7515           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7516                        .getCanonicalType()
7517                        ->isAnyPointerType()) ||
7518           I->getAssociatedExpression()->getType()->isAnyPointerType();
7519 
7520       if (Next == CE || IsPointer || IsFinalArraySection) {
7521         // If this is not the last component, we expect the pointer to be
7522         // associated with an array expression or member expression.
7523         assert((Next == CE ||
7524                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7525                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7526                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7527                "Unexpected expression");
7528 
7529         Address LB =
7530             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7531 
7532         // If this component is a pointer inside the base struct then we don't
7533         // need to create any entry for it - it will be combined with the object
7534         // it is pointing to into a single PTR_AND_OBJ entry.
7535         bool IsMemberPointer =
7536             IsPointer && EncounteredME &&
7537             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7538              EncounteredME);
7539         if (!OverlappedElements.empty()) {
7540           // Handle base element with the info for overlapped elements.
7541           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7542           assert(Next == CE &&
7543                  "Expected last element for the overlapped elements.");
7544           assert(!IsPointer &&
7545                  "Unexpected base element with the pointer type.");
7546           // Mark the whole struct as the struct that requires allocation on the
7547           // device.
7548           PartialStruct.LowestElem = {0, LB};
7549           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7550               I->getAssociatedExpression()->getType());
7551           Address HB = CGF.Builder.CreateConstGEP(
7552               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7553                                                               CGF.VoidPtrTy),
7554               TypeSize.getQuantity() - 1);
7555           PartialStruct.HighestElem = {
7556               std::numeric_limits<decltype(
7557                   PartialStruct.HighestElem.first)>::max(),
7558               HB};
7559           PartialStruct.Base = BP;
7560           // Emit data for non-overlapped data.
7561           OpenMPOffloadMappingFlags Flags =
7562               OMP_MAP_MEMBER_OF |
7563               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7564                              /*AddPtrFlag=*/false,
7565                              /*AddIsTargetParamFlag=*/false);
7566           LB = BP;
7567           llvm::Value *Size = nullptr;
7568           // Do bitcopy of all non-overlapped structure elements.
7569           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7570                    Component : OverlappedElements) {
7571             Address ComponentLB = Address::invalid();
7572             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7573                  Component) {
7574               if (MC.getAssociatedDeclaration()) {
7575                 ComponentLB =
7576                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7577                         .getAddress();
7578                 Size = CGF.Builder.CreatePtrDiff(
7579                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7580                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7581                 break;
7582               }
7583             }
7584             BasePointers.push_back(BP.getPointer());
7585             Pointers.push_back(LB.getPointer());
7586             Sizes.push_back(Size);
7587             Types.push_back(Flags);
7588             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7589           }
7590           BasePointers.push_back(BP.getPointer());
7591           Pointers.push_back(LB.getPointer());
7592           Size = CGF.Builder.CreatePtrDiff(
7593               CGF.EmitCastToVoidPtr(
7594                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7595               CGF.EmitCastToVoidPtr(LB.getPointer()));
7596           Sizes.push_back(Size);
7597           Types.push_back(Flags);
7598           break;
7599         }
7600         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7601         if (!IsMemberPointer) {
7602           BasePointers.push_back(BP.getPointer());
7603           Pointers.push_back(LB.getPointer());
7604           Sizes.push_back(Size);
7605 
7606           // We need to add a pointer flag for each map that comes from the
7607           // same expression except for the first one. We also need to signal
7608           // this map is the first one that relates with the current capture
7609           // (there is a set of entries for each capture).
7610           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7611               MapType, MapModifiers, IsImplicit,
7612               !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7613 
7614           if (!IsExpressionFirstInfo) {
7615             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7616             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7617             if (IsPointer)
7618               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7619                          OMP_MAP_DELETE);
7620 
7621             if (ShouldBeMemberOf) {
7622               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7623               // should be later updated with the correct value of MEMBER_OF.
7624               Flags |= OMP_MAP_MEMBER_OF;
7625               // From now on, all subsequent PTR_AND_OBJ entries should not be
7626               // marked as MEMBER_OF.
7627               ShouldBeMemberOf = false;
7628             }
7629           }
7630 
7631           Types.push_back(Flags);
7632         }
7633 
7634         // If we have encountered a member expression so far, keep track of the
7635         // mapped member. If the parent is "*this", then the value declaration
7636         // is nullptr.
7637         if (EncounteredME) {
7638           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7639           unsigned FieldIndex = FD->getFieldIndex();
7640 
7641           // Update info about the lowest and highest elements for this struct
7642           if (!PartialStruct.Base.isValid()) {
7643             PartialStruct.LowestElem = {FieldIndex, LB};
7644             PartialStruct.HighestElem = {FieldIndex, LB};
7645             PartialStruct.Base = BP;
7646           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7647             PartialStruct.LowestElem = {FieldIndex, LB};
7648           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7649             PartialStruct.HighestElem = {FieldIndex, LB};
7650           }
7651         }
7652 
7653         // If we have a final array section, we are done with this expression.
7654         if (IsFinalArraySection)
7655           break;
7656 
7657         // The pointer becomes the base for the next element.
7658         if (Next != CE)
7659           BP = LB;
7660 
7661         IsExpressionFirstInfo = false;
7662         IsCaptureFirstInfo = false;
7663       }
7664     }
7665   }
7666 
7667   /// Return the adjusted map modifiers if the declaration a capture refers to
7668   /// appears in a first-private clause. This is expected to be used only with
7669   /// directives that start with 'target'.
7670   MappableExprsHandler::OpenMPOffloadMappingFlags
7671   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7672     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7673 
7674     // A first private variable captured by reference will use only the
7675     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7676     // declaration is known as first-private in this handler.
7677     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7678       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7679           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7680         return MappableExprsHandler::OMP_MAP_ALWAYS |
7681                MappableExprsHandler::OMP_MAP_TO;
7682       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7683         return MappableExprsHandler::OMP_MAP_TO |
7684                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7685       return MappableExprsHandler::OMP_MAP_PRIVATE |
7686              MappableExprsHandler::OMP_MAP_TO;
7687     }
7688     return MappableExprsHandler::OMP_MAP_TO |
7689            MappableExprsHandler::OMP_MAP_FROM;
7690   }
7691 
7692   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7693     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7694     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7695                                                   << 48);
7696   }
7697 
7698   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7699                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7700     // If the entry is PTR_AND_OBJ but has not been marked with the special
7701     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7702     // marked as MEMBER_OF.
7703     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7704         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7705       return;
7706 
7707     // Reset the placeholder value to prepare the flag for the assignment of the
7708     // proper MEMBER_OF value.
7709     Flags &= ~OMP_MAP_MEMBER_OF;
7710     Flags |= MemberOfFlag;
7711   }
7712 
7713   void getPlainLayout(const CXXRecordDecl *RD,
7714                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7715                       bool AsBase) const {
7716     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7717 
7718     llvm::StructType *St =
7719         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7720 
7721     unsigned NumElements = St->getNumElements();
7722     llvm::SmallVector<
7723         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7724         RecordLayout(NumElements);
7725 
7726     // Fill bases.
7727     for (const auto &I : RD->bases()) {
7728       if (I.isVirtual())
7729         continue;
7730       const auto *Base = I.getType()->getAsCXXRecordDecl();
7731       // Ignore empty bases.
7732       if (Base->isEmpty() || CGF.getContext()
7733                                  .getASTRecordLayout(Base)
7734                                  .getNonVirtualSize()
7735                                  .isZero())
7736         continue;
7737 
7738       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7739       RecordLayout[FieldIndex] = Base;
7740     }
7741     // Fill in virtual bases.
7742     for (const auto &I : RD->vbases()) {
7743       const auto *Base = I.getType()->getAsCXXRecordDecl();
7744       // Ignore empty bases.
7745       if (Base->isEmpty())
7746         continue;
7747       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7748       if (RecordLayout[FieldIndex])
7749         continue;
7750       RecordLayout[FieldIndex] = Base;
7751     }
7752     // Fill in all the fields.
7753     assert(!RD->isUnion() && "Unexpected union.");
7754     for (const auto *Field : RD->fields()) {
7755       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7756       // will fill in later.)
7757       if (!Field->isBitField()) {
7758         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7759         RecordLayout[FieldIndex] = Field;
7760       }
7761     }
7762     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7763              &Data : RecordLayout) {
7764       if (Data.isNull())
7765         continue;
7766       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7767         getPlainLayout(Base, Layout, /*AsBase=*/true);
7768       else
7769         Layout.push_back(Data.get<const FieldDecl *>());
7770     }
7771   }
7772 
7773 public:
7774   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7775       : CurDir(Dir), CGF(CGF) {
7776     // Extract firstprivate clause information.
7777     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7778       for (const auto *D : C->varlists())
7779         FirstPrivateDecls.insert(
7780             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7781     // Extract device pointer clause information.
7782     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7783       for (auto L : C->component_lists())
7784         DevPointersMap[L.first].push_back(L.second);
7785   }
7786 
7787   /// Generate code for the combined entry if we have a partially mapped struct
7788   /// and take care of the mapping flags of the arguments corresponding to
7789   /// individual struct members.
7790   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7791                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7792                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7793                          const StructRangeInfoTy &PartialStruct) const {
7794     // Base is the base of the struct
7795     BasePointers.push_back(PartialStruct.Base.getPointer());
7796     // Pointer is the address of the lowest element
7797     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7798     Pointers.push_back(LB);
7799     // Size is (addr of {highest+1} element) - (addr of lowest element)
7800     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7801     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7802     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7803     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7804     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7805     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7806                                                   /*isSinged=*/false);
7807     Sizes.push_back(Size);
7808     // Map type is always TARGET_PARAM
7809     Types.push_back(OMP_MAP_TARGET_PARAM);
7810     // Remove TARGET_PARAM flag from the first element
7811     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7812 
7813     // All other current entries will be MEMBER_OF the combined entry
7814     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7815     // 0xFFFF in the MEMBER_OF field).
7816     OpenMPOffloadMappingFlags MemberOfFlag =
7817         getMemberOfFlag(BasePointers.size() - 1);
7818     for (auto &M : CurTypes)
7819       setCorrectMemberOfFlag(M, MemberOfFlag);
7820   }
7821 
7822   /// Generate all the base pointers, section pointers, sizes and map
7823   /// types for the extracted mappable expressions. Also, for each item that
7824   /// relates with a device pointer, a pair of the relevant declaration and
7825   /// index where it occurs is appended to the device pointers info array.
7826   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7827                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7828                        MapFlagsArrayTy &Types) const {
7829     // We have to process the component lists that relate with the same
7830     // declaration in a single chunk so that we can generate the map flags
7831     // correctly. Therefore, we organize all lists in a map.
7832     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7833 
7834     // Helper function to fill the information map for the different supported
7835     // clauses.
7836     auto &&InfoGen = [&Info](
7837         const ValueDecl *D,
7838         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7839         OpenMPMapClauseKind MapType,
7840         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7841         bool ReturnDevicePointer, bool IsImplicit) {
7842       const ValueDecl *VD =
7843           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7844       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7845                             IsImplicit);
7846     };
7847 
7848     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7849     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7850       for (const auto &L : C->component_lists()) {
7851         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7852             /*ReturnDevicePointer=*/false, C->isImplicit());
7853       }
7854     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7855       for (const auto &L : C->component_lists()) {
7856         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7857             /*ReturnDevicePointer=*/false, C->isImplicit());
7858       }
7859     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7860       for (const auto &L : C->component_lists()) {
7861         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7862             /*ReturnDevicePointer=*/false, C->isImplicit());
7863       }
7864 
7865     // Look at the use_device_ptr clause information and mark the existing map
7866     // entries as such. If there is no map information for an entry in the
7867     // use_device_ptr list, we create one with map type 'alloc' and zero size
7868     // section. It is the user fault if that was not mapped before. If there is
7869     // no map information and the pointer is a struct member, then we defer the
7870     // emission of that entry until the whole struct has been processed.
7871     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7872         DeferredInfo;
7873 
7874     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7875     for (const auto *C :
7876         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7877       for (const auto &L : C->component_lists()) {
7878         assert(!L.second.empty() && "Not expecting empty list of components!");
7879         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7880         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7881         const Expr *IE = L.second.back().getAssociatedExpression();
7882         // If the first component is a member expression, we have to look into
7883         // 'this', which maps to null in the map of map information. Otherwise
7884         // look directly for the information.
7885         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7886 
7887         // We potentially have map information for this declaration already.
7888         // Look for the first set of components that refer to it.
7889         if (It != Info.end()) {
7890           auto CI = std::find_if(
7891               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7892                 return MI.Components.back().getAssociatedDeclaration() == VD;
7893               });
7894           // If we found a map entry, signal that the pointer has to be returned
7895           // and move on to the next declaration.
7896           if (CI != It->second.end()) {
7897             CI->ReturnDevicePointer = true;
7898             continue;
7899           }
7900         }
7901 
7902         // We didn't find any match in our map information - generate a zero
7903         // size array section - if the pointer is a struct member we defer this
7904         // action until the whole struct has been processed.
7905         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7906         if (isa<MemberExpr>(IE)) {
7907           // Insert the pointer into Info to be processed by
7908           // generateInfoForComponentList. Because it is a member pointer
7909           // without a pointee, no entry will be generated for it, therefore
7910           // we need to generate one after the whole struct has been processed.
7911           // Nonetheless, generateInfoForComponentList must be called to take
7912           // the pointer into account for the calculation of the range of the
7913           // partial struct.
7914           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7915                   /*ReturnDevicePointer=*/false, C->isImplicit());
7916           DeferredInfo[nullptr].emplace_back(IE, VD);
7917         } else {
7918           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7919               this->CGF.EmitLValue(IE), IE->getExprLoc());
7920           BasePointers.emplace_back(Ptr, VD);
7921           Pointers.push_back(Ptr);
7922           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7923           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7924         }
7925       }
7926     }
7927 
7928     for (const auto &M : Info) {
7929       // We need to know when we generate information for the first component
7930       // associated with a capture, because the mapping flags depend on it.
7931       bool IsFirstComponentList = true;
7932 
7933       // Temporary versions of arrays
7934       MapBaseValuesArrayTy CurBasePointers;
7935       MapValuesArrayTy CurPointers;
7936       MapValuesArrayTy CurSizes;
7937       MapFlagsArrayTy CurTypes;
7938       StructRangeInfoTy PartialStruct;
7939 
7940       for (const MapInfo &L : M.second) {
7941         assert(!L.Components.empty() &&
7942                "Not expecting declaration with no component lists.");
7943 
7944         // Remember the current base pointer index.
7945         unsigned CurrentBasePointersIdx = CurBasePointers.size();
7946         // FIXME: MSVC 2013 seems to require this-> to find the member method.
7947         this->generateInfoForComponentList(
7948             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7949             CurPointers, CurSizes, CurTypes, PartialStruct,
7950             IsFirstComponentList, L.IsImplicit);
7951 
7952         // If this entry relates with a device pointer, set the relevant
7953         // declaration and add the 'return pointer' flag.
7954         if (L.ReturnDevicePointer) {
7955           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7956                  "Unexpected number of mapped base pointers.");
7957 
7958           const ValueDecl *RelevantVD =
7959               L.Components.back().getAssociatedDeclaration();
7960           assert(RelevantVD &&
7961                  "No relevant declaration related with device pointer??");
7962 
7963           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7964           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7965         }
7966         IsFirstComponentList = false;
7967       }
7968 
7969       // Append any pending zero-length pointers which are struct members and
7970       // used with use_device_ptr.
7971       auto CI = DeferredInfo.find(M.first);
7972       if (CI != DeferredInfo.end()) {
7973         for (const DeferredDevicePtrEntryTy &L : CI->second) {
7974           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7975           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7976               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7977           CurBasePointers.emplace_back(BasePtr, L.VD);
7978           CurPointers.push_back(Ptr);
7979           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7980           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7981           // value MEMBER_OF=FFFF so that the entry is later updated with the
7982           // correct value of MEMBER_OF.
7983           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7984                              OMP_MAP_MEMBER_OF);
7985         }
7986       }
7987 
7988       // If there is an entry in PartialStruct it means we have a struct with
7989       // individual members mapped. Emit an extra combined entry.
7990       if (PartialStruct.Base.isValid())
7991         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7992                           PartialStruct);
7993 
7994       // We need to append the results of this capture to what we already have.
7995       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
7996       Pointers.append(CurPointers.begin(), CurPointers.end());
7997       Sizes.append(CurSizes.begin(), CurSizes.end());
7998       Types.append(CurTypes.begin(), CurTypes.end());
7999     }
8000   }
8001 
8002   /// Emit capture info for lambdas for variables captured by reference.
8003   void generateInfoForLambdaCaptures(
8004       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8005       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8006       MapFlagsArrayTy &Types,
8007       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8008     const auto *RD = VD->getType()
8009                          .getCanonicalType()
8010                          .getNonReferenceType()
8011                          ->getAsCXXRecordDecl();
8012     if (!RD || !RD->isLambda())
8013       return;
8014     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8015     LValue VDLVal = CGF.MakeAddrLValue(
8016         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8017     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8018     FieldDecl *ThisCapture = nullptr;
8019     RD->getCaptureFields(Captures, ThisCapture);
8020     if (ThisCapture) {
8021       LValue ThisLVal =
8022           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8023       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8024       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8025       BasePointers.push_back(ThisLVal.getPointer());
8026       Pointers.push_back(ThisLValVal.getPointer());
8027       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8028       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8029                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8030     }
8031     for (const LambdaCapture &LC : RD->captures()) {
8032       if (LC.getCaptureKind() != LCK_ByRef)
8033         continue;
8034       const VarDecl *VD = LC.getCapturedVar();
8035       auto It = Captures.find(VD);
8036       assert(It != Captures.end() && "Found lambda capture without field.");
8037       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8038       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8039       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8040       BasePointers.push_back(VarLVal.getPointer());
8041       Pointers.push_back(VarLValVal.getPointer());
8042       Sizes.push_back(CGF.getTypeSize(
8043           VD->getType().getCanonicalType().getNonReferenceType()));
8044       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8045                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8046     }
8047   }
8048 
8049   /// Set correct indices for lambdas captures.
8050   void adjustMemberOfForLambdaCaptures(
8051       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8052       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8053       MapFlagsArrayTy &Types) const {
8054     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8055       // Set correct member_of idx for all implicit lambda captures.
8056       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8057                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8058         continue;
8059       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8060       assert(BasePtr && "Unable to find base lambda address.");
8061       int TgtIdx = -1;
8062       for (unsigned J = I; J > 0; --J) {
8063         unsigned Idx = J - 1;
8064         if (Pointers[Idx] != BasePtr)
8065           continue;
8066         TgtIdx = Idx;
8067         break;
8068       }
8069       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8070       // All other current entries will be MEMBER_OF the combined entry
8071       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8072       // 0xFFFF in the MEMBER_OF field).
8073       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8074       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8075     }
8076   }
8077 
8078   /// Generate the base pointers, section pointers, sizes and map types
8079   /// associated to a given capture.
8080   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8081                               llvm::Value *Arg,
8082                               MapBaseValuesArrayTy &BasePointers,
8083                               MapValuesArrayTy &Pointers,
8084                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8085                               StructRangeInfoTy &PartialStruct) const {
8086     assert(!Cap->capturesVariableArrayType() &&
8087            "Not expecting to generate map info for a variable array type!");
8088 
8089     // We need to know when we generating information for the first component
8090     const ValueDecl *VD = Cap->capturesThis()
8091                               ? nullptr
8092                               : Cap->getCapturedVar()->getCanonicalDecl();
8093 
8094     // If this declaration appears in a is_device_ptr clause we just have to
8095     // pass the pointer by value. If it is a reference to a declaration, we just
8096     // pass its value.
8097     if (DevPointersMap.count(VD)) {
8098       BasePointers.emplace_back(Arg, VD);
8099       Pointers.push_back(Arg);
8100       Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8101       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8102       return;
8103     }
8104 
8105     using MapData =
8106         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8107                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8108     SmallVector<MapData, 4> DeclComponentLists;
8109     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8110     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8111       for (const auto &L : C->decl_component_lists(VD)) {
8112         assert(L.first == VD &&
8113                "We got information for the wrong declaration??");
8114         assert(!L.second.empty() &&
8115                "Not expecting declaration with no component lists.");
8116         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8117                                         C->getMapTypeModifiers(),
8118                                         C->isImplicit());
8119       }
8120     }
8121 
8122     // Find overlapping elements (including the offset from the base element).
8123     llvm::SmallDenseMap<
8124         const MapData *,
8125         llvm::SmallVector<
8126             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8127         4>
8128         OverlappedData;
8129     size_t Count = 0;
8130     for (const MapData &L : DeclComponentLists) {
8131       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8132       OpenMPMapClauseKind MapType;
8133       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8134       bool IsImplicit;
8135       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8136       ++Count;
8137       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8138         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8139         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8140         auto CI = Components.rbegin();
8141         auto CE = Components.rend();
8142         auto SI = Components1.rbegin();
8143         auto SE = Components1.rend();
8144         for (; CI != CE && SI != SE; ++CI, ++SI) {
8145           if (CI->getAssociatedExpression()->getStmtClass() !=
8146               SI->getAssociatedExpression()->getStmtClass())
8147             break;
8148           // Are we dealing with different variables/fields?
8149           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8150             break;
8151         }
8152         // Found overlapping if, at least for one component, reached the head of
8153         // the components list.
8154         if (CI == CE || SI == SE) {
8155           assert((CI != CE || SI != SE) &&
8156                  "Unexpected full match of the mapping components.");
8157           const MapData &BaseData = CI == CE ? L : L1;
8158           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8159               SI == SE ? Components : Components1;
8160           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8161           OverlappedElements.getSecond().push_back(SubData);
8162         }
8163       }
8164     }
8165     // Sort the overlapped elements for each item.
8166     llvm::SmallVector<const FieldDecl *, 4> Layout;
8167     if (!OverlappedData.empty()) {
8168       if (const auto *CRD =
8169               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8170         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8171       else {
8172         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8173         Layout.append(RD->field_begin(), RD->field_end());
8174       }
8175     }
8176     for (auto &Pair : OverlappedData) {
8177       llvm::sort(
8178           Pair.getSecond(),
8179           [&Layout](
8180               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8181               OMPClauseMappableExprCommon::MappableExprComponentListRef
8182                   Second) {
8183             auto CI = First.rbegin();
8184             auto CE = First.rend();
8185             auto SI = Second.rbegin();
8186             auto SE = Second.rend();
8187             for (; CI != CE && SI != SE; ++CI, ++SI) {
8188               if (CI->getAssociatedExpression()->getStmtClass() !=
8189                   SI->getAssociatedExpression()->getStmtClass())
8190                 break;
8191               // Are we dealing with different variables/fields?
8192               if (CI->getAssociatedDeclaration() !=
8193                   SI->getAssociatedDeclaration())
8194                 break;
8195             }
8196 
8197             // Lists contain the same elements.
8198             if (CI == CE && SI == SE)
8199               return false;
8200 
8201             // List with less elements is less than list with more elements.
8202             if (CI == CE || SI == SE)
8203               return CI == CE;
8204 
8205             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8206             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8207             if (FD1->getParent() == FD2->getParent())
8208               return FD1->getFieldIndex() < FD2->getFieldIndex();
8209             const auto It =
8210                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8211                   return FD == FD1 || FD == FD2;
8212                 });
8213             return *It == FD1;
8214           });
8215     }
8216 
8217     // Associated with a capture, because the mapping flags depend on it.
8218     // Go through all of the elements with the overlapped elements.
8219     for (const auto &Pair : OverlappedData) {
8220       const MapData &L = *Pair.getFirst();
8221       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8222       OpenMPMapClauseKind MapType;
8223       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8224       bool IsImplicit;
8225       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8226       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8227           OverlappedComponents = Pair.getSecond();
8228       bool IsFirstComponentList = true;
8229       generateInfoForComponentList(MapType, MapModifiers, Components,
8230                                    BasePointers, Pointers, Sizes, Types,
8231                                    PartialStruct, IsFirstComponentList,
8232                                    IsImplicit, OverlappedComponents);
8233     }
8234     // Go through other elements without overlapped elements.
8235     bool IsFirstComponentList = OverlappedData.empty();
8236     for (const MapData &L : DeclComponentLists) {
8237       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8238       OpenMPMapClauseKind MapType;
8239       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8240       bool IsImplicit;
8241       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8242       auto It = OverlappedData.find(&L);
8243       if (It == OverlappedData.end())
8244         generateInfoForComponentList(MapType, MapModifiers, Components,
8245                                      BasePointers, Pointers, Sizes, Types,
8246                                      PartialStruct, IsFirstComponentList,
8247                                      IsImplicit);
8248       IsFirstComponentList = false;
8249     }
8250   }
8251 
8252   /// Generate the base pointers, section pointers, sizes and map types
8253   /// associated with the declare target link variables.
8254   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8255                                         MapValuesArrayTy &Pointers,
8256                                         MapValuesArrayTy &Sizes,
8257                                         MapFlagsArrayTy &Types) const {
8258     // Map other list items in the map clause which are not captured variables
8259     // but "declare target link" global variables.
8260     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8261       for (const auto &L : C->component_lists()) {
8262         if (!L.first)
8263           continue;
8264         const auto *VD = dyn_cast<VarDecl>(L.first);
8265         if (!VD)
8266           continue;
8267         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8268             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8269         if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8270           continue;
8271         StructRangeInfoTy PartialStruct;
8272         generateInfoForComponentList(
8273             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8274             Pointers, Sizes, Types, PartialStruct,
8275             /*IsFirstComponentList=*/true, C->isImplicit());
8276         assert(!PartialStruct.Base.isValid() &&
8277                "No partial structs for declare target link expected.");
8278       }
8279     }
8280   }
8281 
8282   /// Generate the default map information for a given capture \a CI,
8283   /// record field declaration \a RI and captured value \a CV.
8284   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8285                               const FieldDecl &RI, llvm::Value *CV,
8286                               MapBaseValuesArrayTy &CurBasePointers,
8287                               MapValuesArrayTy &CurPointers,
8288                               MapValuesArrayTy &CurSizes,
8289                               MapFlagsArrayTy &CurMapTypes) const {
8290     // Do the default mapping.
8291     if (CI.capturesThis()) {
8292       CurBasePointers.push_back(CV);
8293       CurPointers.push_back(CV);
8294       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8295       CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
8296       // Default map type.
8297       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8298     } else if (CI.capturesVariableByCopy()) {
8299       CurBasePointers.push_back(CV);
8300       CurPointers.push_back(CV);
8301       if (!RI.getType()->isAnyPointerType()) {
8302         // We have to signal to the runtime captures passed by value that are
8303         // not pointers.
8304         CurMapTypes.push_back(OMP_MAP_LITERAL);
8305         CurSizes.push_back(CGF.getTypeSize(RI.getType()));
8306       } else {
8307         // Pointers are implicitly mapped with a zero size and no flags
8308         // (other than first map that is added for all implicit maps).
8309         CurMapTypes.push_back(OMP_MAP_NONE);
8310         CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
8311       }
8312     } else {
8313       assert(CI.capturesVariable() && "Expected captured reference.");
8314       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8315       QualType ElementType = PtrTy->getPointeeType();
8316       CurSizes.push_back(CGF.getTypeSize(ElementType));
8317       // The default map type for a scalar/complex type is 'to' because by
8318       // default the value doesn't have to be retrieved. For an aggregate
8319       // type, the default is 'tofrom'.
8320       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8321       const VarDecl *VD = CI.getCapturedVar();
8322       if (FirstPrivateDecls.count(VD) &&
8323           VD->getType().isConstant(CGF.getContext())) {
8324         llvm::Constant *Addr =
8325             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8326         // Copy the value of the original variable to the new global copy.
8327         CGF.Builder.CreateMemCpy(
8328             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8329             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8330             CurSizes.back(), /*isVolatile=*/false);
8331         // Use new global variable as the base pointers.
8332         CurBasePointers.push_back(Addr);
8333         CurPointers.push_back(Addr);
8334       } else {
8335         CurBasePointers.push_back(CV);
8336         if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
8337           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8338               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8339               AlignmentSource::Decl));
8340           CurPointers.push_back(PtrAddr.getPointer());
8341         } else {
8342           CurPointers.push_back(CV);
8343         }
8344       }
8345     }
8346     // Every default map produces a single argument which is a target parameter.
8347     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8348 
8349     // Add flag stating this is an implicit map.
8350     CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8351   }
8352 };
8353 
8354 enum OpenMPOffloadingReservedDeviceIDs {
8355   /// Device ID if the device was not defined, runtime should get it
8356   /// from environment variables in the spec.
8357   OMP_DEVICEID_UNDEF = -1,
8358 };
8359 } // anonymous namespace
8360 
8361 /// Emit the arrays used to pass the captures and map information to the
8362 /// offloading runtime library. If there is no map or capture information,
8363 /// return nullptr by reference.
8364 static void
8365 emitOffloadingArrays(CodeGenFunction &CGF,
8366                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8367                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8368                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8369                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8370                      CGOpenMPRuntime::TargetDataInfo &Info) {
8371   CodeGenModule &CGM = CGF.CGM;
8372   ASTContext &Ctx = CGF.getContext();
8373 
8374   // Reset the array information.
8375   Info.clearArrayInfo();
8376   Info.NumberOfPtrs = BasePointers.size();
8377 
8378   if (Info.NumberOfPtrs) {
8379     // Detect if we have any capture size requiring runtime evaluation of the
8380     // size so that a constant array could be eventually used.
8381     bool hasRuntimeEvaluationCaptureSize = false;
8382     for (llvm::Value *S : Sizes)
8383       if (!isa<llvm::Constant>(S)) {
8384         hasRuntimeEvaluationCaptureSize = true;
8385         break;
8386       }
8387 
8388     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8389     QualType PointerArrayType =
8390         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8391                                  /*IndexTypeQuals=*/0);
8392 
8393     Info.BasePointersArray =
8394         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8395     Info.PointersArray =
8396         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8397 
8398     // If we don't have any VLA types or other types that require runtime
8399     // evaluation, we can use a constant array for the map sizes, otherwise we
8400     // need to fill up the arrays as we do for the pointers.
8401     if (hasRuntimeEvaluationCaptureSize) {
8402       QualType SizeArrayType = Ctx.getConstantArrayType(
8403           Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
8404           /*IndexTypeQuals=*/0);
8405       Info.SizesArray =
8406           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8407     } else {
8408       // We expect all the sizes to be constant, so we collect them to create
8409       // a constant array.
8410       SmallVector<llvm::Constant *, 16> ConstSizes;
8411       for (llvm::Value *S : Sizes)
8412         ConstSizes.push_back(cast<llvm::Constant>(S));
8413 
8414       auto *SizesArrayInit = llvm::ConstantArray::get(
8415           llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
8416       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8417       auto *SizesArrayGbl = new llvm::GlobalVariable(
8418           CGM.getModule(), SizesArrayInit->getType(),
8419           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8420           SizesArrayInit, Name);
8421       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8422       Info.SizesArray = SizesArrayGbl;
8423     }
8424 
8425     // The map types are always constant so we don't need to generate code to
8426     // fill arrays. Instead, we create an array constant.
8427     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8428     llvm::copy(MapTypes, Mapping.begin());
8429     llvm::Constant *MapTypesArrayInit =
8430         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8431     std::string MaptypesName =
8432         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8433     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8434         CGM.getModule(), MapTypesArrayInit->getType(),
8435         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8436         MapTypesArrayInit, MaptypesName);
8437     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8438     Info.MapTypesArray = MapTypesArrayGbl;
8439 
8440     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8441       llvm::Value *BPVal = *BasePointers[I];
8442       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8443           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8444           Info.BasePointersArray, 0, I);
8445       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8446           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8447       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8448       CGF.Builder.CreateStore(BPVal, BPAddr);
8449 
8450       if (Info.requiresDevicePointerInfo())
8451         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8452           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8453 
8454       llvm::Value *PVal = Pointers[I];
8455       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8456           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8457           Info.PointersArray, 0, I);
8458       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8459           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8460       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8461       CGF.Builder.CreateStore(PVal, PAddr);
8462 
8463       if (hasRuntimeEvaluationCaptureSize) {
8464         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8465             llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8466             Info.SizesArray,
8467             /*Idx0=*/0,
8468             /*Idx1=*/I);
8469         Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8470         CGF.Builder.CreateStore(
8471             CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8472             SAddr);
8473       }
8474     }
8475   }
8476 }
8477 /// Emit the arguments to be passed to the runtime library based on the
8478 /// arrays of pointers, sizes and map types.
8479 static void emitOffloadingArraysArgument(
8480     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8481     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8482     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8483   CodeGenModule &CGM = CGF.CGM;
8484   if (Info.NumberOfPtrs) {
8485     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8486         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8487         Info.BasePointersArray,
8488         /*Idx0=*/0, /*Idx1=*/0);
8489     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8490         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8491         Info.PointersArray,
8492         /*Idx0=*/0,
8493         /*Idx1=*/0);
8494     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8495         llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8496         /*Idx0=*/0, /*Idx1=*/0);
8497     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8498         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8499         Info.MapTypesArray,
8500         /*Idx0=*/0,
8501         /*Idx1=*/0);
8502   } else {
8503     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8504     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8505     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
8506     MapTypesArrayArg =
8507         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8508   }
8509 }
8510 
8511 /// Check for inner distribute directive.
8512 static const OMPExecutableDirective *
8513 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8514   const auto *CS = D.getInnermostCapturedStmt();
8515   const auto *Body =
8516       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8517   const Stmt *ChildStmt =
8518       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8519 
8520   if (const auto *NestedDir =
8521           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8522     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8523     switch (D.getDirectiveKind()) {
8524     case OMPD_target:
8525       if (isOpenMPDistributeDirective(DKind))
8526         return NestedDir;
8527       if (DKind == OMPD_teams) {
8528         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8529             /*IgnoreCaptured=*/true);
8530         if (!Body)
8531           return nullptr;
8532         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8533         if (const auto *NND =
8534                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8535           DKind = NND->getDirectiveKind();
8536           if (isOpenMPDistributeDirective(DKind))
8537             return NND;
8538         }
8539       }
8540       return nullptr;
8541     case OMPD_target_teams:
8542       if (isOpenMPDistributeDirective(DKind))
8543         return NestedDir;
8544       return nullptr;
8545     case OMPD_target_parallel:
8546     case OMPD_target_simd:
8547     case OMPD_target_parallel_for:
8548     case OMPD_target_parallel_for_simd:
8549       return nullptr;
8550     case OMPD_target_teams_distribute:
8551     case OMPD_target_teams_distribute_simd:
8552     case OMPD_target_teams_distribute_parallel_for:
8553     case OMPD_target_teams_distribute_parallel_for_simd:
8554     case OMPD_parallel:
8555     case OMPD_for:
8556     case OMPD_parallel_for:
8557     case OMPD_parallel_sections:
8558     case OMPD_for_simd:
8559     case OMPD_parallel_for_simd:
8560     case OMPD_cancel:
8561     case OMPD_cancellation_point:
8562     case OMPD_ordered:
8563     case OMPD_threadprivate:
8564     case OMPD_allocate:
8565     case OMPD_task:
8566     case OMPD_simd:
8567     case OMPD_sections:
8568     case OMPD_section:
8569     case OMPD_single:
8570     case OMPD_master:
8571     case OMPD_critical:
8572     case OMPD_taskyield:
8573     case OMPD_barrier:
8574     case OMPD_taskwait:
8575     case OMPD_taskgroup:
8576     case OMPD_atomic:
8577     case OMPD_flush:
8578     case OMPD_teams:
8579     case OMPD_target_data:
8580     case OMPD_target_exit_data:
8581     case OMPD_target_enter_data:
8582     case OMPD_distribute:
8583     case OMPD_distribute_simd:
8584     case OMPD_distribute_parallel_for:
8585     case OMPD_distribute_parallel_for_simd:
8586     case OMPD_teams_distribute:
8587     case OMPD_teams_distribute_simd:
8588     case OMPD_teams_distribute_parallel_for:
8589     case OMPD_teams_distribute_parallel_for_simd:
8590     case OMPD_target_update:
8591     case OMPD_declare_simd:
8592     case OMPD_declare_target:
8593     case OMPD_end_declare_target:
8594     case OMPD_declare_reduction:
8595     case OMPD_declare_mapper:
8596     case OMPD_taskloop:
8597     case OMPD_taskloop_simd:
8598     case OMPD_requires:
8599     case OMPD_unknown:
8600       llvm_unreachable("Unexpected directive.");
8601     }
8602   }
8603 
8604   return nullptr;
8605 }
8606 
8607 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8608     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8609     const llvm::function_ref<llvm::Value *(
8610         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8611   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8612   const OMPExecutableDirective *TD = &D;
8613   // Get nested teams distribute kind directive, if any.
8614   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8615     TD = getNestedDistributeDirective(CGM.getContext(), D);
8616   if (!TD)
8617     return;
8618   const auto *LD = cast<OMPLoopDirective>(TD);
8619   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8620                                                      PrePostActionTy &) {
8621     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8622 
8623     // Emit device ID if any.
8624     llvm::Value *DeviceID;
8625     if (Device)
8626       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8627                                            CGF.Int64Ty, /*isSigned=*/true);
8628     else
8629       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8630 
8631     llvm::Value *Args[] = {DeviceID, NumIterations};
8632     CGF.EmitRuntimeCall(
8633         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8634   };
8635   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8636 }
8637 
8638 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8639                                      const OMPExecutableDirective &D,
8640                                      llvm::Function *OutlinedFn,
8641                                      llvm::Value *OutlinedFnID,
8642                                      const Expr *IfCond, const Expr *Device) {
8643   if (!CGF.HaveInsertPoint())
8644     return;
8645 
8646   assert(OutlinedFn && "Invalid outlined function!");
8647 
8648   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8649   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8650   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8651   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8652                                             PrePostActionTy &) {
8653     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8654   };
8655   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8656 
8657   CodeGenFunction::OMPTargetDataInfo InputInfo;
8658   llvm::Value *MapTypesArray = nullptr;
8659   // Fill up the pointer arrays and transfer execution to the device.
8660   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8661                     &MapTypesArray, &CS, RequiresOuterTask,
8662                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8663     // On top of the arrays that were filled up, the target offloading call
8664     // takes as arguments the device id as well as the host pointer. The host
8665     // pointer is used by the runtime library to identify the current target
8666     // region, so it only has to be unique and not necessarily point to
8667     // anything. It could be the pointer to the outlined function that
8668     // implements the target region, but we aren't using that so that the
8669     // compiler doesn't need to keep that, and could therefore inline the host
8670     // function if proven worthwhile during optimization.
8671 
8672     // From this point on, we need to have an ID of the target region defined.
8673     assert(OutlinedFnID && "Invalid outlined function ID!");
8674 
8675     // Emit device ID if any.
8676     llvm::Value *DeviceID;
8677     if (Device) {
8678       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8679                                            CGF.Int64Ty, /*isSigned=*/true);
8680     } else {
8681       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8682     }
8683 
8684     // Emit the number of elements in the offloading arrays.
8685     llvm::Value *PointerNum =
8686         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8687 
8688     // Return value of the runtime offloading call.
8689     llvm::Value *Return;
8690 
8691     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8692     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8693 
8694     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8695     // The target region is an outlined function launched by the runtime
8696     // via calls __tgt_target() or __tgt_target_teams().
8697     //
8698     // __tgt_target() launches a target region with one team and one thread,
8699     // executing a serial region.  This master thread may in turn launch
8700     // more threads within its team upon encountering a parallel region,
8701     // however, no additional teams can be launched on the device.
8702     //
8703     // __tgt_target_teams() launches a target region with one or more teams,
8704     // each with one or more threads.  This call is required for target
8705     // constructs such as:
8706     //  'target teams'
8707     //  'target' / 'teams'
8708     //  'target teams distribute parallel for'
8709     //  'target parallel'
8710     // and so on.
8711     //
8712     // Note that on the host and CPU targets, the runtime implementation of
8713     // these calls simply call the outlined function without forking threads.
8714     // The outlined functions themselves have runtime calls to
8715     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8716     // the compiler in emitTeamsCall() and emitParallelCall().
8717     //
8718     // In contrast, on the NVPTX target, the implementation of
8719     // __tgt_target_teams() launches a GPU kernel with the requested number
8720     // of teams and threads so no additional calls to the runtime are required.
8721     if (NumTeams) {
8722       // If we have NumTeams defined this means that we have an enclosed teams
8723       // region. Therefore we also expect to have NumThreads defined. These two
8724       // values should be defined in the presence of a teams directive,
8725       // regardless of having any clauses associated. If the user is using teams
8726       // but no clauses, these two values will be the default that should be
8727       // passed to the runtime library - a 32-bit integer with the value zero.
8728       assert(NumThreads && "Thread limit expression should be available along "
8729                            "with number of teams.");
8730       llvm::Value *OffloadingArgs[] = {DeviceID,
8731                                        OutlinedFnID,
8732                                        PointerNum,
8733                                        InputInfo.BasePointersArray.getPointer(),
8734                                        InputInfo.PointersArray.getPointer(),
8735                                        InputInfo.SizesArray.getPointer(),
8736                                        MapTypesArray,
8737                                        NumTeams,
8738                                        NumThreads};
8739       Return = CGF.EmitRuntimeCall(
8740           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8741                                           : OMPRTL__tgt_target_teams),
8742           OffloadingArgs);
8743     } else {
8744       llvm::Value *OffloadingArgs[] = {DeviceID,
8745                                        OutlinedFnID,
8746                                        PointerNum,
8747                                        InputInfo.BasePointersArray.getPointer(),
8748                                        InputInfo.PointersArray.getPointer(),
8749                                        InputInfo.SizesArray.getPointer(),
8750                                        MapTypesArray};
8751       Return = CGF.EmitRuntimeCall(
8752           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8753                                           : OMPRTL__tgt_target),
8754           OffloadingArgs);
8755     }
8756 
8757     // Check the error code and execute the host version if required.
8758     llvm::BasicBlock *OffloadFailedBlock =
8759         CGF.createBasicBlock("omp_offload.failed");
8760     llvm::BasicBlock *OffloadContBlock =
8761         CGF.createBasicBlock("omp_offload.cont");
8762     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8763     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8764 
8765     CGF.EmitBlock(OffloadFailedBlock);
8766     if (RequiresOuterTask) {
8767       CapturedVars.clear();
8768       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8769     }
8770     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8771     CGF.EmitBranch(OffloadContBlock);
8772 
8773     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8774   };
8775 
8776   // Notify that the host version must be executed.
8777   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8778                     RequiresOuterTask](CodeGenFunction &CGF,
8779                                        PrePostActionTy &) {
8780     if (RequiresOuterTask) {
8781       CapturedVars.clear();
8782       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8783     }
8784     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8785   };
8786 
8787   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8788                           &CapturedVars, RequiresOuterTask,
8789                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8790     // Fill up the arrays with all the captured variables.
8791     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8792     MappableExprsHandler::MapValuesArrayTy Pointers;
8793     MappableExprsHandler::MapValuesArrayTy Sizes;
8794     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8795 
8796     // Get mappable expression information.
8797     MappableExprsHandler MEHandler(D, CGF);
8798     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8799 
8800     auto RI = CS.getCapturedRecordDecl()->field_begin();
8801     auto CV = CapturedVars.begin();
8802     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8803                                               CE = CS.capture_end();
8804          CI != CE; ++CI, ++RI, ++CV) {
8805       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8806       MappableExprsHandler::MapValuesArrayTy CurPointers;
8807       MappableExprsHandler::MapValuesArrayTy CurSizes;
8808       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8809       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8810 
8811       // VLA sizes are passed to the outlined region by copy and do not have map
8812       // information associated.
8813       if (CI->capturesVariableArrayType()) {
8814         CurBasePointers.push_back(*CV);
8815         CurPointers.push_back(*CV);
8816         CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8817         // Copy to the device as an argument. No need to retrieve it.
8818         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8819                               MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8820       } else {
8821         // If we have any information in the map clause, we use it, otherwise we
8822         // just do a default mapping.
8823         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8824                                          CurSizes, CurMapTypes, PartialStruct);
8825         if (CurBasePointers.empty())
8826           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8827                                            CurPointers, CurSizes, CurMapTypes);
8828         // Generate correct mapping for variables captured by reference in
8829         // lambdas.
8830         if (CI->capturesVariable())
8831           MEHandler.generateInfoForLambdaCaptures(
8832               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8833               CurMapTypes, LambdaPointers);
8834       }
8835       // We expect to have at least an element of information for this capture.
8836       assert(!CurBasePointers.empty() &&
8837              "Non-existing map pointer for capture!");
8838       assert(CurBasePointers.size() == CurPointers.size() &&
8839              CurBasePointers.size() == CurSizes.size() &&
8840              CurBasePointers.size() == CurMapTypes.size() &&
8841              "Inconsistent map information sizes!");
8842 
8843       // If there is an entry in PartialStruct it means we have a struct with
8844       // individual members mapped. Emit an extra combined entry.
8845       if (PartialStruct.Base.isValid())
8846         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8847                                     CurMapTypes, PartialStruct);
8848 
8849       // We need to append the results of this capture to what we already have.
8850       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8851       Pointers.append(CurPointers.begin(), CurPointers.end());
8852       Sizes.append(CurSizes.begin(), CurSizes.end());
8853       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8854     }
8855     // Adjust MEMBER_OF flags for the lambdas captures.
8856     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8857                                               Pointers, MapTypes);
8858     // Map other list items in the map clause which are not captured variables
8859     // but "declare target link" global variables.
8860     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8861                                                MapTypes);
8862 
8863     TargetDataInfo Info;
8864     // Fill up the arrays and create the arguments.
8865     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8866     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8867                                  Info.PointersArray, Info.SizesArray,
8868                                  Info.MapTypesArray, Info);
8869     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8870     InputInfo.BasePointersArray =
8871         Address(Info.BasePointersArray, CGM.getPointerAlign());
8872     InputInfo.PointersArray =
8873         Address(Info.PointersArray, CGM.getPointerAlign());
8874     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8875     MapTypesArray = Info.MapTypesArray;
8876     if (RequiresOuterTask)
8877       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8878     else
8879       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8880   };
8881 
8882   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8883                              CodeGenFunction &CGF, PrePostActionTy &) {
8884     if (RequiresOuterTask) {
8885       CodeGenFunction::OMPTargetDataInfo InputInfo;
8886       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8887     } else {
8888       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8889     }
8890   };
8891 
8892   // If we have a target function ID it means that we need to support
8893   // offloading, otherwise, just execute on the host. We need to execute on host
8894   // regardless of the conditional in the if clause if, e.g., the user do not
8895   // specify target triples.
8896   if (OutlinedFnID) {
8897     if (IfCond) {
8898       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8899     } else {
8900       RegionCodeGenTy ThenRCG(TargetThenGen);
8901       ThenRCG(CGF);
8902     }
8903   } else {
8904     RegionCodeGenTy ElseRCG(TargetElseGen);
8905     ElseRCG(CGF);
8906   }
8907 }
8908 
8909 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8910                                                     StringRef ParentName) {
8911   if (!S)
8912     return;
8913 
8914   // Codegen OMP target directives that offload compute to the device.
8915   bool RequiresDeviceCodegen =
8916       isa<OMPExecutableDirective>(S) &&
8917       isOpenMPTargetExecutionDirective(
8918           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8919 
8920   if (RequiresDeviceCodegen) {
8921     const auto &E = *cast<OMPExecutableDirective>(S);
8922     unsigned DeviceID;
8923     unsigned FileID;
8924     unsigned Line;
8925     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8926                              FileID, Line);
8927 
8928     // Is this a target region that should not be emitted as an entry point? If
8929     // so just signal we are done with this target region.
8930     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8931                                                             ParentName, Line))
8932       return;
8933 
8934     switch (E.getDirectiveKind()) {
8935     case OMPD_target:
8936       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8937                                                    cast<OMPTargetDirective>(E));
8938       break;
8939     case OMPD_target_parallel:
8940       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8941           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8942       break;
8943     case OMPD_target_teams:
8944       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8945           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8946       break;
8947     case OMPD_target_teams_distribute:
8948       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8949           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8950       break;
8951     case OMPD_target_teams_distribute_simd:
8952       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8953           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8954       break;
8955     case OMPD_target_parallel_for:
8956       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8957           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8958       break;
8959     case OMPD_target_parallel_for_simd:
8960       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8961           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8962       break;
8963     case OMPD_target_simd:
8964       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8965           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8966       break;
8967     case OMPD_target_teams_distribute_parallel_for:
8968       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8969           CGM, ParentName,
8970           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8971       break;
8972     case OMPD_target_teams_distribute_parallel_for_simd:
8973       CodeGenFunction::
8974           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8975               CGM, ParentName,
8976               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8977       break;
8978     case OMPD_parallel:
8979     case OMPD_for:
8980     case OMPD_parallel_for:
8981     case OMPD_parallel_sections:
8982     case OMPD_for_simd:
8983     case OMPD_parallel_for_simd:
8984     case OMPD_cancel:
8985     case OMPD_cancellation_point:
8986     case OMPD_ordered:
8987     case OMPD_threadprivate:
8988     case OMPD_allocate:
8989     case OMPD_task:
8990     case OMPD_simd:
8991     case OMPD_sections:
8992     case OMPD_section:
8993     case OMPD_single:
8994     case OMPD_master:
8995     case OMPD_critical:
8996     case OMPD_taskyield:
8997     case OMPD_barrier:
8998     case OMPD_taskwait:
8999     case OMPD_taskgroup:
9000     case OMPD_atomic:
9001     case OMPD_flush:
9002     case OMPD_teams:
9003     case OMPD_target_data:
9004     case OMPD_target_exit_data:
9005     case OMPD_target_enter_data:
9006     case OMPD_distribute:
9007     case OMPD_distribute_simd:
9008     case OMPD_distribute_parallel_for:
9009     case OMPD_distribute_parallel_for_simd:
9010     case OMPD_teams_distribute:
9011     case OMPD_teams_distribute_simd:
9012     case OMPD_teams_distribute_parallel_for:
9013     case OMPD_teams_distribute_parallel_for_simd:
9014     case OMPD_target_update:
9015     case OMPD_declare_simd:
9016     case OMPD_declare_target:
9017     case OMPD_end_declare_target:
9018     case OMPD_declare_reduction:
9019     case OMPD_declare_mapper:
9020     case OMPD_taskloop:
9021     case OMPD_taskloop_simd:
9022     case OMPD_requires:
9023     case OMPD_unknown:
9024       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9025     }
9026     return;
9027   }
9028 
9029   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9030     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9031       return;
9032 
9033     scanForTargetRegionsFunctions(
9034         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9035     return;
9036   }
9037 
9038   // If this is a lambda function, look into its body.
9039   if (const auto *L = dyn_cast<LambdaExpr>(S))
9040     S = L->getBody();
9041 
9042   // Keep looking for target regions recursively.
9043   for (const Stmt *II : S->children())
9044     scanForTargetRegionsFunctions(II, ParentName);
9045 }
9046 
9047 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9048   // If emitting code for the host, we do not process FD here. Instead we do
9049   // the normal code generation.
9050   if (!CGM.getLangOpts().OpenMPIsDevice)
9051     return false;
9052 
9053   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9054   StringRef Name = CGM.getMangledName(GD);
9055   // Try to detect target regions in the function.
9056   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9057     scanForTargetRegionsFunctions(FD->getBody(), Name);
9058 
9059   // Do not to emit function if it is not marked as declare target.
9060   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9061          AlreadyEmittedTargetFunctions.count(Name) == 0;
9062 }
9063 
9064 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9065   if (!CGM.getLangOpts().OpenMPIsDevice)
9066     return false;
9067 
9068   // Check if there are Ctors/Dtors in this declaration and look for target
9069   // regions in it. We use the complete variant to produce the kernel name
9070   // mangling.
9071   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9072   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9073     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9074       StringRef ParentName =
9075           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9076       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9077     }
9078     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9079       StringRef ParentName =
9080           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9081       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9082     }
9083   }
9084 
9085   // Do not to emit variable if it is not marked as declare target.
9086   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9087       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9088           cast<VarDecl>(GD.getDecl()));
9089   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
9090     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9091     return true;
9092   }
9093   return false;
9094 }
9095 
9096 llvm::Constant *
9097 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9098                                                 const VarDecl *VD) {
9099   assert(VD->getType().isConstant(CGM.getContext()) &&
9100          "Expected constant variable.");
9101   StringRef VarName;
9102   llvm::Constant *Addr;
9103   llvm::GlobalValue::LinkageTypes Linkage;
9104   QualType Ty = VD->getType();
9105   SmallString<128> Buffer;
9106   {
9107     unsigned DeviceID;
9108     unsigned FileID;
9109     unsigned Line;
9110     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9111                              FileID, Line);
9112     llvm::raw_svector_ostream OS(Buffer);
9113     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9114        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9115     VarName = OS.str();
9116   }
9117   Linkage = llvm::GlobalValue::InternalLinkage;
9118   Addr =
9119       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9120                                   getDefaultFirstprivateAddressSpace());
9121   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9122   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9123   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9124   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9125       VarName, Addr, VarSize,
9126       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9127   return Addr;
9128 }
9129 
9130 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9131                                                    llvm::Constant *Addr) {
9132   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9133       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9134   if (!Res) {
9135     if (CGM.getLangOpts().OpenMPIsDevice) {
9136       // Register non-target variables being emitted in device code (debug info
9137       // may cause this).
9138       StringRef VarName = CGM.getMangledName(VD);
9139       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9140     }
9141     return;
9142   }
9143   // Register declare target variables.
9144   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9145   StringRef VarName;
9146   CharUnits VarSize;
9147   llvm::GlobalValue::LinkageTypes Linkage;
9148   switch (*Res) {
9149   case OMPDeclareTargetDeclAttr::MT_To:
9150     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9151     VarName = CGM.getMangledName(VD);
9152     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9153       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9154       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9155     } else {
9156       VarSize = CharUnits::Zero();
9157     }
9158     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9159     // Temp solution to prevent optimizations of the internal variables.
9160     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9161       std::string RefName = getName({VarName, "ref"});
9162       if (!CGM.GetGlobalValue(RefName)) {
9163         llvm::Constant *AddrRef =
9164             getOrCreateInternalVariable(Addr->getType(), RefName);
9165         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9166         GVAddrRef->setConstant(/*Val=*/true);
9167         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9168         GVAddrRef->setInitializer(Addr);
9169         CGM.addCompilerUsedGlobal(GVAddrRef);
9170       }
9171     }
9172     break;
9173   case OMPDeclareTargetDeclAttr::MT_Link:
9174     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9175     if (CGM.getLangOpts().OpenMPIsDevice) {
9176       VarName = Addr->getName();
9177       Addr = nullptr;
9178     } else {
9179       VarName = getAddrOfDeclareTargetLink(VD).getName();
9180       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
9181     }
9182     VarSize = CGM.getPointerSize();
9183     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9184     break;
9185   }
9186   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9187       VarName, Addr, VarSize, Flags, Linkage);
9188 }
9189 
9190 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9191   if (isa<FunctionDecl>(GD.getDecl()) ||
9192       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9193     return emitTargetFunctions(GD);
9194 
9195   return emitTargetGlobalVariable(GD);
9196 }
9197 
9198 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9199   for (const VarDecl *VD : DeferredGlobalVariables) {
9200     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9201         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9202     if (!Res)
9203       continue;
9204     if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
9205       CGM.EmitGlobal(VD);
9206     } else {
9207       assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
9208              "Expected to or link clauses.");
9209       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
9210     }
9211   }
9212 }
9213 
9214 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9215     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9216   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9217          " Expected target-based directive.");
9218 }
9219 
9220 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9221     const OMPRequiresDecl *D) {
9222   for (const OMPClause *Clause : D->clauselists()) {
9223     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9224       HasRequiresUnifiedSharedMemory = true;
9225       break;
9226     }
9227   }
9228 }
9229 
9230 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9231                                                        LangAS &AS) {
9232   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9233     return false;
9234   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9235   switch(A->getAllocatorType()) {
9236   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9237   // Not supported, fallback to the default mem space.
9238   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9239   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9240   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9241   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9242   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9243   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9244   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9245     AS = LangAS::Default;
9246     return true;
9247   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9248     llvm_unreachable("Expected predefined allocator for the variables with the "
9249                      "static storage.");
9250   }
9251   return false;
9252 }
9253 
9254 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9255     CodeGenModule &CGM)
9256     : CGM(CGM) {
9257   if (CGM.getLangOpts().OpenMPIsDevice) {
9258     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9259     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9260   }
9261 }
9262 
9263 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9264   if (CGM.getLangOpts().OpenMPIsDevice)
9265     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9266 }
9267 
9268 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9269   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9270     return true;
9271 
9272   StringRef Name = CGM.getMangledName(GD);
9273   const auto *D = cast<FunctionDecl>(GD.getDecl());
9274   // Do not to emit function if it is marked as declare target as it was already
9275   // emitted.
9276   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9277     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9278       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9279         return !F->isDeclaration();
9280       return false;
9281     }
9282     return true;
9283   }
9284 
9285   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9286 }
9287 
9288 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9289   // If we don't have entries or if we are emitting code for the device, we
9290   // don't need to do anything.
9291   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9292       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9293       (OffloadEntriesInfoManager.empty() &&
9294        !HasEmittedDeclareTargetRegion &&
9295        !HasEmittedTargetRegion))
9296     return nullptr;
9297 
9298   // Create and register the function that handles the requires directives.
9299   ASTContext &C = CGM.getContext();
9300 
9301   llvm::Function *RequiresRegFn;
9302   {
9303     CodeGenFunction CGF(CGM);
9304     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9305     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9306     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9307     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9308     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9309     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9310     // TODO: check for other requires clauses.
9311     // The requires directive takes effect only when a target region is
9312     // present in the compilation unit. Otherwise it is ignored and not
9313     // passed to the runtime. This avoids the runtime from throwing an error
9314     // for mismatching requires clauses across compilation units that don't
9315     // contain at least 1 target region.
9316     assert((HasEmittedTargetRegion ||
9317             HasEmittedDeclareTargetRegion ||
9318             !OffloadEntriesInfoManager.empty()) &&
9319            "Target or declare target region expected.");
9320     if (HasRequiresUnifiedSharedMemory)
9321       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9322     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9323         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9324     CGF.FinishFunction();
9325   }
9326   return RequiresRegFn;
9327 }
9328 
9329 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9330   // If we have offloading in the current module, we need to emit the entries
9331   // now and register the offloading descriptor.
9332   createOffloadEntriesAndInfoMetadata();
9333 
9334   // Create and register the offloading binary descriptors. This is the main
9335   // entity that captures all the information about offloading in the current
9336   // compilation unit.
9337   return createOffloadingBinaryDescriptorRegistration();
9338 }
9339 
9340 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9341                                     const OMPExecutableDirective &D,
9342                                     SourceLocation Loc,
9343                                     llvm::Function *OutlinedFn,
9344                                     ArrayRef<llvm::Value *> CapturedVars) {
9345   if (!CGF.HaveInsertPoint())
9346     return;
9347 
9348   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9349   CodeGenFunction::RunCleanupsScope Scope(CGF);
9350 
9351   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9352   llvm::Value *Args[] = {
9353       RTLoc,
9354       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9355       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9356   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9357   RealArgs.append(std::begin(Args), std::end(Args));
9358   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9359 
9360   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9361   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9362 }
9363 
9364 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9365                                          const Expr *NumTeams,
9366                                          const Expr *ThreadLimit,
9367                                          SourceLocation Loc) {
9368   if (!CGF.HaveInsertPoint())
9369     return;
9370 
9371   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9372 
9373   llvm::Value *NumTeamsVal =
9374       NumTeams
9375           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9376                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9377           : CGF.Builder.getInt32(0);
9378 
9379   llvm::Value *ThreadLimitVal =
9380       ThreadLimit
9381           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9382                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9383           : CGF.Builder.getInt32(0);
9384 
9385   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9386   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9387                                      ThreadLimitVal};
9388   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9389                       PushNumTeamsArgs);
9390 }
9391 
9392 void CGOpenMPRuntime::emitTargetDataCalls(
9393     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9394     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9395   if (!CGF.HaveInsertPoint())
9396     return;
9397 
9398   // Action used to replace the default codegen action and turn privatization
9399   // off.
9400   PrePostActionTy NoPrivAction;
9401 
9402   // Generate the code for the opening of the data environment. Capture all the
9403   // arguments of the runtime call by reference because they are used in the
9404   // closing of the region.
9405   auto &&BeginThenGen = [this, &D, Device, &Info,
9406                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9407     // Fill up the arrays with all the mapped variables.
9408     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9409     MappableExprsHandler::MapValuesArrayTy Pointers;
9410     MappableExprsHandler::MapValuesArrayTy Sizes;
9411     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9412 
9413     // Get map clause information.
9414     MappableExprsHandler MCHandler(D, CGF);
9415     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9416 
9417     // Fill up the arrays and create the arguments.
9418     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9419 
9420     llvm::Value *BasePointersArrayArg = nullptr;
9421     llvm::Value *PointersArrayArg = nullptr;
9422     llvm::Value *SizesArrayArg = nullptr;
9423     llvm::Value *MapTypesArrayArg = nullptr;
9424     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9425                                  SizesArrayArg, MapTypesArrayArg, Info);
9426 
9427     // Emit device ID if any.
9428     llvm::Value *DeviceID = nullptr;
9429     if (Device) {
9430       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9431                                            CGF.Int64Ty, /*isSigned=*/true);
9432     } else {
9433       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9434     }
9435 
9436     // Emit the number of elements in the offloading arrays.
9437     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9438 
9439     llvm::Value *OffloadingArgs[] = {
9440         DeviceID,         PointerNum,    BasePointersArrayArg,
9441         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9442     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9443                         OffloadingArgs);
9444 
9445     // If device pointer privatization is required, emit the body of the region
9446     // here. It will have to be duplicated: with and without privatization.
9447     if (!Info.CaptureDeviceAddrMap.empty())
9448       CodeGen(CGF);
9449   };
9450 
9451   // Generate code for the closing of the data region.
9452   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9453                                             PrePostActionTy &) {
9454     assert(Info.isValid() && "Invalid data environment closing arguments.");
9455 
9456     llvm::Value *BasePointersArrayArg = nullptr;
9457     llvm::Value *PointersArrayArg = nullptr;
9458     llvm::Value *SizesArrayArg = nullptr;
9459     llvm::Value *MapTypesArrayArg = nullptr;
9460     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9461                                  SizesArrayArg, MapTypesArrayArg, Info);
9462 
9463     // Emit device ID if any.
9464     llvm::Value *DeviceID = nullptr;
9465     if (Device) {
9466       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9467                                            CGF.Int64Ty, /*isSigned=*/true);
9468     } else {
9469       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9470     }
9471 
9472     // Emit the number of elements in the offloading arrays.
9473     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9474 
9475     llvm::Value *OffloadingArgs[] = {
9476         DeviceID,         PointerNum,    BasePointersArrayArg,
9477         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9478     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9479                         OffloadingArgs);
9480   };
9481 
9482   // If we need device pointer privatization, we need to emit the body of the
9483   // region with no privatization in the 'else' branch of the conditional.
9484   // Otherwise, we don't have to do anything.
9485   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9486                                                          PrePostActionTy &) {
9487     if (!Info.CaptureDeviceAddrMap.empty()) {
9488       CodeGen.setAction(NoPrivAction);
9489       CodeGen(CGF);
9490     }
9491   };
9492 
9493   // We don't have to do anything to close the region if the if clause evaluates
9494   // to false.
9495   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9496 
9497   if (IfCond) {
9498     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9499   } else {
9500     RegionCodeGenTy RCG(BeginThenGen);
9501     RCG(CGF);
9502   }
9503 
9504   // If we don't require privatization of device pointers, we emit the body in
9505   // between the runtime calls. This avoids duplicating the body code.
9506   if (Info.CaptureDeviceAddrMap.empty()) {
9507     CodeGen.setAction(NoPrivAction);
9508     CodeGen(CGF);
9509   }
9510 
9511   if (IfCond) {
9512     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9513   } else {
9514     RegionCodeGenTy RCG(EndThenGen);
9515     RCG(CGF);
9516   }
9517 }
9518 
9519 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9520     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9521     const Expr *Device) {
9522   if (!CGF.HaveInsertPoint())
9523     return;
9524 
9525   assert((isa<OMPTargetEnterDataDirective>(D) ||
9526           isa<OMPTargetExitDataDirective>(D) ||
9527           isa<OMPTargetUpdateDirective>(D)) &&
9528          "Expecting either target enter, exit data, or update directives.");
9529 
9530   CodeGenFunction::OMPTargetDataInfo InputInfo;
9531   llvm::Value *MapTypesArray = nullptr;
9532   // Generate the code for the opening of the data environment.
9533   auto &&ThenGen = [this, &D, Device, &InputInfo,
9534                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9535     // Emit device ID if any.
9536     llvm::Value *DeviceID = nullptr;
9537     if (Device) {
9538       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9539                                            CGF.Int64Ty, /*isSigned=*/true);
9540     } else {
9541       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9542     }
9543 
9544     // Emit the number of elements in the offloading arrays.
9545     llvm::Constant *PointerNum =
9546         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9547 
9548     llvm::Value *OffloadingArgs[] = {DeviceID,
9549                                      PointerNum,
9550                                      InputInfo.BasePointersArray.getPointer(),
9551                                      InputInfo.PointersArray.getPointer(),
9552                                      InputInfo.SizesArray.getPointer(),
9553                                      MapTypesArray};
9554 
9555     // Select the right runtime function call for each expected standalone
9556     // directive.
9557     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9558     OpenMPRTLFunction RTLFn;
9559     switch (D.getDirectiveKind()) {
9560     case OMPD_target_enter_data:
9561       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9562                         : OMPRTL__tgt_target_data_begin;
9563       break;
9564     case OMPD_target_exit_data:
9565       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9566                         : OMPRTL__tgt_target_data_end;
9567       break;
9568     case OMPD_target_update:
9569       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9570                         : OMPRTL__tgt_target_data_update;
9571       break;
9572     case OMPD_parallel:
9573     case OMPD_for:
9574     case OMPD_parallel_for:
9575     case OMPD_parallel_sections:
9576     case OMPD_for_simd:
9577     case OMPD_parallel_for_simd:
9578     case OMPD_cancel:
9579     case OMPD_cancellation_point:
9580     case OMPD_ordered:
9581     case OMPD_threadprivate:
9582     case OMPD_allocate:
9583     case OMPD_task:
9584     case OMPD_simd:
9585     case OMPD_sections:
9586     case OMPD_section:
9587     case OMPD_single:
9588     case OMPD_master:
9589     case OMPD_critical:
9590     case OMPD_taskyield:
9591     case OMPD_barrier:
9592     case OMPD_taskwait:
9593     case OMPD_taskgroup:
9594     case OMPD_atomic:
9595     case OMPD_flush:
9596     case OMPD_teams:
9597     case OMPD_target_data:
9598     case OMPD_distribute:
9599     case OMPD_distribute_simd:
9600     case OMPD_distribute_parallel_for:
9601     case OMPD_distribute_parallel_for_simd:
9602     case OMPD_teams_distribute:
9603     case OMPD_teams_distribute_simd:
9604     case OMPD_teams_distribute_parallel_for:
9605     case OMPD_teams_distribute_parallel_for_simd:
9606     case OMPD_declare_simd:
9607     case OMPD_declare_target:
9608     case OMPD_end_declare_target:
9609     case OMPD_declare_reduction:
9610     case OMPD_declare_mapper:
9611     case OMPD_taskloop:
9612     case OMPD_taskloop_simd:
9613     case OMPD_target:
9614     case OMPD_target_simd:
9615     case OMPD_target_teams_distribute:
9616     case OMPD_target_teams_distribute_simd:
9617     case OMPD_target_teams_distribute_parallel_for:
9618     case OMPD_target_teams_distribute_parallel_for_simd:
9619     case OMPD_target_teams:
9620     case OMPD_target_parallel:
9621     case OMPD_target_parallel_for:
9622     case OMPD_target_parallel_for_simd:
9623     case OMPD_requires:
9624     case OMPD_unknown:
9625       llvm_unreachable("Unexpected standalone target data directive.");
9626       break;
9627     }
9628     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9629   };
9630 
9631   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9632                              CodeGenFunction &CGF, PrePostActionTy &) {
9633     // Fill up the arrays with all the mapped variables.
9634     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9635     MappableExprsHandler::MapValuesArrayTy Pointers;
9636     MappableExprsHandler::MapValuesArrayTy Sizes;
9637     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9638 
9639     // Get map clause information.
9640     MappableExprsHandler MEHandler(D, CGF);
9641     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9642 
9643     TargetDataInfo Info;
9644     // Fill up the arrays and create the arguments.
9645     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9646     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9647                                  Info.PointersArray, Info.SizesArray,
9648                                  Info.MapTypesArray, Info);
9649     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9650     InputInfo.BasePointersArray =
9651         Address(Info.BasePointersArray, CGM.getPointerAlign());
9652     InputInfo.PointersArray =
9653         Address(Info.PointersArray, CGM.getPointerAlign());
9654     InputInfo.SizesArray =
9655         Address(Info.SizesArray, CGM.getPointerAlign());
9656     MapTypesArray = Info.MapTypesArray;
9657     if (D.hasClausesOfKind<OMPDependClause>())
9658       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9659     else
9660       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9661   };
9662 
9663   if (IfCond) {
9664     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9665                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9666   } else {
9667     RegionCodeGenTy ThenRCG(TargetThenGen);
9668     ThenRCG(CGF);
9669   }
9670 }
9671 
9672 namespace {
9673   /// Kind of parameter in a function with 'declare simd' directive.
9674   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9675   /// Attribute set of the parameter.
9676   struct ParamAttrTy {
9677     ParamKindTy Kind = Vector;
9678     llvm::APSInt StrideOrArg;
9679     llvm::APSInt Alignment;
9680   };
9681 } // namespace
9682 
9683 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9684                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9685   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9686   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9687   // of that clause. The VLEN value must be power of 2.
9688   // In other case the notion of the function`s "characteristic data type" (CDT)
9689   // is used to compute the vector length.
9690   // CDT is defined in the following order:
9691   //   a) For non-void function, the CDT is the return type.
9692   //   b) If the function has any non-uniform, non-linear parameters, then the
9693   //   CDT is the type of the first such parameter.
9694   //   c) If the CDT determined by a) or b) above is struct, union, or class
9695   //   type which is pass-by-value (except for the type that maps to the
9696   //   built-in complex data type), the characteristic data type is int.
9697   //   d) If none of the above three cases is applicable, the CDT is int.
9698   // The VLEN is then determined based on the CDT and the size of vector
9699   // register of that ISA for which current vector version is generated. The
9700   // VLEN is computed using the formula below:
9701   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9702   // where vector register size specified in section 3.2.1 Registers and the
9703   // Stack Frame of original AMD64 ABI document.
9704   QualType RetType = FD->getReturnType();
9705   if (RetType.isNull())
9706     return 0;
9707   ASTContext &C = FD->getASTContext();
9708   QualType CDT;
9709   if (!RetType.isNull() && !RetType->isVoidType()) {
9710     CDT = RetType;
9711   } else {
9712     unsigned Offset = 0;
9713     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9714       if (ParamAttrs[Offset].Kind == Vector)
9715         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9716       ++Offset;
9717     }
9718     if (CDT.isNull()) {
9719       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9720         if (ParamAttrs[I + Offset].Kind == Vector) {
9721           CDT = FD->getParamDecl(I)->getType();
9722           break;
9723         }
9724       }
9725     }
9726   }
9727   if (CDT.isNull())
9728     CDT = C.IntTy;
9729   CDT = CDT->getCanonicalTypeUnqualified();
9730   if (CDT->isRecordType() || CDT->isUnionType())
9731     CDT = C.IntTy;
9732   return C.getTypeSize(CDT);
9733 }
9734 
9735 static void
9736 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9737                            const llvm::APSInt &VLENVal,
9738                            ArrayRef<ParamAttrTy> ParamAttrs,
9739                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9740   struct ISADataTy {
9741     char ISA;
9742     unsigned VecRegSize;
9743   };
9744   ISADataTy ISAData[] = {
9745       {
9746           'b', 128
9747       }, // SSE
9748       {
9749           'c', 256
9750       }, // AVX
9751       {
9752           'd', 256
9753       }, // AVX2
9754       {
9755           'e', 512
9756       }, // AVX512
9757   };
9758   llvm::SmallVector<char, 2> Masked;
9759   switch (State) {
9760   case OMPDeclareSimdDeclAttr::BS_Undefined:
9761     Masked.push_back('N');
9762     Masked.push_back('M');
9763     break;
9764   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9765     Masked.push_back('N');
9766     break;
9767   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9768     Masked.push_back('M');
9769     break;
9770   }
9771   for (char Mask : Masked) {
9772     for (const ISADataTy &Data : ISAData) {
9773       SmallString<256> Buffer;
9774       llvm::raw_svector_ostream Out(Buffer);
9775       Out << "_ZGV" << Data.ISA << Mask;
9776       if (!VLENVal) {
9777         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9778         assert(NumElts && "Non-zero simdlen/cdtsize expected");
9779         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9780       } else {
9781         Out << VLENVal;
9782       }
9783       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9784         switch (ParamAttr.Kind){
9785         case LinearWithVarStride:
9786           Out << 's' << ParamAttr.StrideOrArg;
9787           break;
9788         case Linear:
9789           Out << 'l';
9790           if (!!ParamAttr.StrideOrArg)
9791             Out << ParamAttr.StrideOrArg;
9792           break;
9793         case Uniform:
9794           Out << 'u';
9795           break;
9796         case Vector:
9797           Out << 'v';
9798           break;
9799         }
9800         if (!!ParamAttr.Alignment)
9801           Out << 'a' << ParamAttr.Alignment;
9802       }
9803       Out << '_' << Fn->getName();
9804       Fn->addFnAttr(Out.str());
9805     }
9806   }
9807 }
9808 
9809 // This are the Functions that are needed to mangle the name of the
9810 // vector functions generated by the compiler, according to the rules
9811 // defined in the "Vector Function ABI specifications for AArch64",
9812 // available at
9813 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9814 
9815 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9816 ///
9817 /// TODO: Need to implement the behavior for reference marked with a
9818 /// var or no linear modifiers (1.b in the section). For this, we
9819 /// need to extend ParamKindTy to support the linear modifiers.
9820 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9821   QT = QT.getCanonicalType();
9822 
9823   if (QT->isVoidType())
9824     return false;
9825 
9826   if (Kind == ParamKindTy::Uniform)
9827     return false;
9828 
9829   if (Kind == ParamKindTy::Linear)
9830     return false;
9831 
9832   // TODO: Handle linear references with modifiers
9833 
9834   if (Kind == ParamKindTy::LinearWithVarStride)
9835     return false;
9836 
9837   return true;
9838 }
9839 
9840 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9841 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9842   QT = QT.getCanonicalType();
9843   unsigned Size = C.getTypeSize(QT);
9844 
9845   // Only scalars and complex within 16 bytes wide set PVB to true.
9846   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9847     return false;
9848 
9849   if (QT->isFloatingType())
9850     return true;
9851 
9852   if (QT->isIntegerType())
9853     return true;
9854 
9855   if (QT->isPointerType())
9856     return true;
9857 
9858   // TODO: Add support for complex types (section 3.1.2, item 2).
9859 
9860   return false;
9861 }
9862 
9863 /// Computes the lane size (LS) of a return type or of an input parameter,
9864 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9865 /// TODO: Add support for references, section 3.2.1, item 1.
9866 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9867   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9868     QualType PTy = QT.getCanonicalType()->getPointeeType();
9869     if (getAArch64PBV(PTy, C))
9870       return C.getTypeSize(PTy);
9871   }
9872   if (getAArch64PBV(QT, C))
9873     return C.getTypeSize(QT);
9874 
9875   return C.getTypeSize(C.getUIntPtrType());
9876 }
9877 
9878 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9879 // signature of the scalar function, as defined in 3.2.2 of the
9880 // AAVFABI.
9881 static std::tuple<unsigned, unsigned, bool>
9882 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9883   QualType RetType = FD->getReturnType().getCanonicalType();
9884 
9885   ASTContext &C = FD->getASTContext();
9886 
9887   bool OutputBecomesInput = false;
9888 
9889   llvm::SmallVector<unsigned, 8> Sizes;
9890   if (!RetType->isVoidType()) {
9891     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9892     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9893       OutputBecomesInput = true;
9894   }
9895   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9896     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9897     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9898   }
9899 
9900   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9901   // The LS of a function parameter / return value can only be a power
9902   // of 2, starting from 8 bits, up to 128.
9903   assert(std::all_of(Sizes.begin(), Sizes.end(),
9904                      [](unsigned Size) {
9905                        return Size == 8 || Size == 16 || Size == 32 ||
9906                               Size == 64 || Size == 128;
9907                      }) &&
9908          "Invalid size");
9909 
9910   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
9911                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
9912                          OutputBecomesInput);
9913 }
9914 
9915 /// Mangle the parameter part of the vector function name according to
9916 /// their OpenMP classification. The mangling function is defined in
9917 /// section 3.5 of the AAVFABI.
9918 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
9919   SmallString<256> Buffer;
9920   llvm::raw_svector_ostream Out(Buffer);
9921   for (const auto &ParamAttr : ParamAttrs) {
9922     switch (ParamAttr.Kind) {
9923     case LinearWithVarStride:
9924       Out << "ls" << ParamAttr.StrideOrArg;
9925       break;
9926     case Linear:
9927       Out << 'l';
9928       // Don't print the step value if it is not present or if it is
9929       // equal to 1.
9930       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
9931         Out << ParamAttr.StrideOrArg;
9932       break;
9933     case Uniform:
9934       Out << 'u';
9935       break;
9936     case Vector:
9937       Out << 'v';
9938       break;
9939     }
9940 
9941     if (!!ParamAttr.Alignment)
9942       Out << 'a' << ParamAttr.Alignment;
9943   }
9944 
9945   return Out.str();
9946 }
9947 
9948 // Function used to add the attribute. The parameter `VLEN` is
9949 // templated to allow the use of "x" when targeting scalable functions
9950 // for SVE.
9951 template <typename T>
9952 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
9953                                  char ISA, StringRef ParSeq,
9954                                  StringRef MangledName, bool OutputBecomesInput,
9955                                  llvm::Function *Fn) {
9956   SmallString<256> Buffer;
9957   llvm::raw_svector_ostream Out(Buffer);
9958   Out << Prefix << ISA << LMask << VLEN;
9959   if (OutputBecomesInput)
9960     Out << "v";
9961   Out << ParSeq << "_" << MangledName;
9962   Fn->addFnAttr(Out.str());
9963 }
9964 
9965 // Helper function to generate the Advanced SIMD names depending on
9966 // the value of the NDS when simdlen is not present.
9967 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
9968                                       StringRef Prefix, char ISA,
9969                                       StringRef ParSeq, StringRef MangledName,
9970                                       bool OutputBecomesInput,
9971                                       llvm::Function *Fn) {
9972   switch (NDS) {
9973   case 8:
9974     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9975                          OutputBecomesInput, Fn);
9976     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
9977                          OutputBecomesInput, Fn);
9978     break;
9979   case 16:
9980     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9981                          OutputBecomesInput, Fn);
9982     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9983                          OutputBecomesInput, Fn);
9984     break;
9985   case 32:
9986     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9987                          OutputBecomesInput, Fn);
9988     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9989                          OutputBecomesInput, Fn);
9990     break;
9991   case 64:
9992   case 128:
9993     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9994                          OutputBecomesInput, Fn);
9995     break;
9996   default:
9997     llvm_unreachable("Scalar type is too wide.");
9998   }
9999 }
10000 
10001 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10002 static void emitAArch64DeclareSimdFunction(
10003     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10004     ArrayRef<ParamAttrTy> ParamAttrs,
10005     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10006     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10007 
10008   // Get basic data for building the vector signature.
10009   const auto Data = getNDSWDS(FD, ParamAttrs);
10010   const unsigned NDS = std::get<0>(Data);
10011   const unsigned WDS = std::get<1>(Data);
10012   const bool OutputBecomesInput = std::get<2>(Data);
10013 
10014   // Check the values provided via `simdlen` by the user.
10015   // 1. A `simdlen(1)` doesn't produce vector signatures,
10016   if (UserVLEN == 1) {
10017     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10018         DiagnosticsEngine::Warning,
10019         "The clause simdlen(1) has no effect when targeting aarch64.");
10020     CGM.getDiags().Report(SLoc, DiagID);
10021     return;
10022   }
10023 
10024   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10025   // Advanced SIMD output.
10026   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10027     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10028         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10029                                     "power of 2 when targeting Advanced SIMD.");
10030     CGM.getDiags().Report(SLoc, DiagID);
10031     return;
10032   }
10033 
10034   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10035   // limits.
10036   if (ISA == 's' && UserVLEN != 0) {
10037     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10038       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10039           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10040                                       "lanes in the architectural constraints "
10041                                       "for SVE (min is 128-bit, max is "
10042                                       "2048-bit, by steps of 128-bit)");
10043       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10044       return;
10045     }
10046   }
10047 
10048   // Sort out parameter sequence.
10049   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10050   StringRef Prefix = "_ZGV";
10051   // Generate simdlen from user input (if any).
10052   if (UserVLEN) {
10053     if (ISA == 's') {
10054       // SVE generates only a masked function.
10055       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10056                            OutputBecomesInput, Fn);
10057     } else {
10058       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10059       // Advanced SIMD generates one or two functions, depending on
10060       // the `[not]inbranch` clause.
10061       switch (State) {
10062       case OMPDeclareSimdDeclAttr::BS_Undefined:
10063         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10064                              OutputBecomesInput, Fn);
10065         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10066                              OutputBecomesInput, Fn);
10067         break;
10068       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10069         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10070                              OutputBecomesInput, Fn);
10071         break;
10072       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10073         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10074                              OutputBecomesInput, Fn);
10075         break;
10076       }
10077     }
10078   } else {
10079     // If no user simdlen is provided, follow the AAVFABI rules for
10080     // generating the vector length.
10081     if (ISA == 's') {
10082       // SVE, section 3.4.1, item 1.
10083       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10084                            OutputBecomesInput, Fn);
10085     } else {
10086       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10087       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10088       // two vector names depending on the use of the clause
10089       // `[not]inbranch`.
10090       switch (State) {
10091       case OMPDeclareSimdDeclAttr::BS_Undefined:
10092         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10093                                   OutputBecomesInput, Fn);
10094         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10095                                   OutputBecomesInput, Fn);
10096         break;
10097       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10098         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10099                                   OutputBecomesInput, Fn);
10100         break;
10101       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10102         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10103                                   OutputBecomesInput, Fn);
10104         break;
10105       }
10106     }
10107   }
10108 }
10109 
10110 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10111                                               llvm::Function *Fn) {
10112   ASTContext &C = CGM.getContext();
10113   FD = FD->getMostRecentDecl();
10114   // Map params to their positions in function decl.
10115   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10116   if (isa<CXXMethodDecl>(FD))
10117     ParamPositions.try_emplace(FD, 0);
10118   unsigned ParamPos = ParamPositions.size();
10119   for (const ParmVarDecl *P : FD->parameters()) {
10120     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10121     ++ParamPos;
10122   }
10123   while (FD) {
10124     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10125       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10126       // Mark uniform parameters.
10127       for (const Expr *E : Attr->uniforms()) {
10128         E = E->IgnoreParenImpCasts();
10129         unsigned Pos;
10130         if (isa<CXXThisExpr>(E)) {
10131           Pos = ParamPositions[FD];
10132         } else {
10133           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10134                                 ->getCanonicalDecl();
10135           Pos = ParamPositions[PVD];
10136         }
10137         ParamAttrs[Pos].Kind = Uniform;
10138       }
10139       // Get alignment info.
10140       auto NI = Attr->alignments_begin();
10141       for (const Expr *E : Attr->aligneds()) {
10142         E = E->IgnoreParenImpCasts();
10143         unsigned Pos;
10144         QualType ParmTy;
10145         if (isa<CXXThisExpr>(E)) {
10146           Pos = ParamPositions[FD];
10147           ParmTy = E->getType();
10148         } else {
10149           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10150                                 ->getCanonicalDecl();
10151           Pos = ParamPositions[PVD];
10152           ParmTy = PVD->getType();
10153         }
10154         ParamAttrs[Pos].Alignment =
10155             (*NI)
10156                 ? (*NI)->EvaluateKnownConstInt(C)
10157                 : llvm::APSInt::getUnsigned(
10158                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10159                           .getQuantity());
10160         ++NI;
10161       }
10162       // Mark linear parameters.
10163       auto SI = Attr->steps_begin();
10164       auto MI = Attr->modifiers_begin();
10165       for (const Expr *E : Attr->linears()) {
10166         E = E->IgnoreParenImpCasts();
10167         unsigned Pos;
10168         if (isa<CXXThisExpr>(E)) {
10169           Pos = ParamPositions[FD];
10170         } else {
10171           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10172                                 ->getCanonicalDecl();
10173           Pos = ParamPositions[PVD];
10174         }
10175         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10176         ParamAttr.Kind = Linear;
10177         if (*SI) {
10178           Expr::EvalResult Result;
10179           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10180             if (const auto *DRE =
10181                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10182               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10183                 ParamAttr.Kind = LinearWithVarStride;
10184                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10185                     ParamPositions[StridePVD->getCanonicalDecl()]);
10186               }
10187             }
10188           } else {
10189             ParamAttr.StrideOrArg = Result.Val.getInt();
10190           }
10191         }
10192         ++SI;
10193         ++MI;
10194       }
10195       llvm::APSInt VLENVal;
10196       SourceLocation ExprLoc;
10197       const Expr *VLENExpr = Attr->getSimdlen();
10198       if (VLENExpr) {
10199         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10200         ExprLoc = VLENExpr->getExprLoc();
10201       }
10202       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10203       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10204           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10205         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10206       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10207         unsigned VLEN = VLENVal.getExtValue();
10208         StringRef MangledName = Fn->getName();
10209         if (CGM.getTarget().hasFeature("sve"))
10210           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10211                                          MangledName, 's', 128, Fn, ExprLoc);
10212         if (CGM.getTarget().hasFeature("neon"))
10213           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10214                                          MangledName, 'n', 128, Fn, ExprLoc);
10215       }
10216     }
10217     FD = FD->getPreviousDecl();
10218   }
10219 }
10220 
10221 namespace {
10222 /// Cleanup action for doacross support.
10223 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10224 public:
10225   static const int DoacrossFinArgs = 2;
10226 
10227 private:
10228   llvm::FunctionCallee RTLFn;
10229   llvm::Value *Args[DoacrossFinArgs];
10230 
10231 public:
10232   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10233                     ArrayRef<llvm::Value *> CallArgs)
10234       : RTLFn(RTLFn) {
10235     assert(CallArgs.size() == DoacrossFinArgs);
10236     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10237   }
10238   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10239     if (!CGF.HaveInsertPoint())
10240       return;
10241     CGF.EmitRuntimeCall(RTLFn, Args);
10242   }
10243 };
10244 } // namespace
10245 
10246 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10247                                        const OMPLoopDirective &D,
10248                                        ArrayRef<Expr *> NumIterations) {
10249   if (!CGF.HaveInsertPoint())
10250     return;
10251 
10252   ASTContext &C = CGM.getContext();
10253   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10254   RecordDecl *RD;
10255   if (KmpDimTy.isNull()) {
10256     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10257     //  kmp_int64 lo; // lower
10258     //  kmp_int64 up; // upper
10259     //  kmp_int64 st; // stride
10260     // };
10261     RD = C.buildImplicitRecord("kmp_dim");
10262     RD->startDefinition();
10263     addFieldToRecordDecl(C, RD, Int64Ty);
10264     addFieldToRecordDecl(C, RD, Int64Ty);
10265     addFieldToRecordDecl(C, RD, Int64Ty);
10266     RD->completeDefinition();
10267     KmpDimTy = C.getRecordType(RD);
10268   } else {
10269     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10270   }
10271   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10272   QualType ArrayTy =
10273       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10274 
10275   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10276   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10277   enum { LowerFD = 0, UpperFD, StrideFD };
10278   // Fill dims with data.
10279   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10280     LValue DimsLVal = CGF.MakeAddrLValue(
10281         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10282     // dims.upper = num_iterations;
10283     LValue UpperLVal = CGF.EmitLValueForField(
10284         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10285     llvm::Value *NumIterVal =
10286         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10287                                  D.getNumIterations()->getType(), Int64Ty,
10288                                  D.getNumIterations()->getExprLoc());
10289     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10290     // dims.stride = 1;
10291     LValue StrideLVal = CGF.EmitLValueForField(
10292         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10293     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10294                           StrideLVal);
10295   }
10296 
10297   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10298   // kmp_int32 num_dims, struct kmp_dim * dims);
10299   llvm::Value *Args[] = {
10300       emitUpdateLocation(CGF, D.getBeginLoc()),
10301       getThreadID(CGF, D.getBeginLoc()),
10302       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10303       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10304           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10305           CGM.VoidPtrTy)};
10306 
10307   llvm::FunctionCallee RTLFn =
10308       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10309   CGF.EmitRuntimeCall(RTLFn, Args);
10310   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10311       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10312   llvm::FunctionCallee FiniRTLFn =
10313       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10314   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10315                                              llvm::makeArrayRef(FiniArgs));
10316 }
10317 
10318 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10319                                           const OMPDependClause *C) {
10320   QualType Int64Ty =
10321       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10322   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10323   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10324       Int64Ty, Size, ArrayType::Normal, 0);
10325   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10326   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10327     const Expr *CounterVal = C->getLoopData(I);
10328     assert(CounterVal);
10329     llvm::Value *CntVal = CGF.EmitScalarConversion(
10330         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10331         CounterVal->getExprLoc());
10332     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10333                           /*Volatile=*/false, Int64Ty);
10334   }
10335   llvm::Value *Args[] = {
10336       emitUpdateLocation(CGF, C->getBeginLoc()),
10337       getThreadID(CGF, C->getBeginLoc()),
10338       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10339   llvm::FunctionCallee RTLFn;
10340   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10341     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10342   } else {
10343     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10344     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10345   }
10346   CGF.EmitRuntimeCall(RTLFn, Args);
10347 }
10348 
10349 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10350                                llvm::FunctionCallee Callee,
10351                                ArrayRef<llvm::Value *> Args) const {
10352   assert(Loc.isValid() && "Outlined function call location must be valid.");
10353   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10354 
10355   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10356     if (Fn->doesNotThrow()) {
10357       CGF.EmitNounwindRuntimeCall(Fn, Args);
10358       return;
10359     }
10360   }
10361   CGF.EmitRuntimeCall(Callee, Args);
10362 }
10363 
10364 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10365     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10366     ArrayRef<llvm::Value *> Args) const {
10367   emitCall(CGF, Loc, OutlinedFn, Args);
10368 }
10369 
10370 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10371   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10372     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10373       HasEmittedDeclareTargetRegion = true;
10374 }
10375 
10376 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10377                                              const VarDecl *NativeParam,
10378                                              const VarDecl *TargetParam) const {
10379   return CGF.GetAddrOfLocalVar(NativeParam);
10380 }
10381 
10382 namespace {
10383 /// Cleanup action for allocate support.
10384 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10385 public:
10386   static const int CleanupArgs = 3;
10387 
10388 private:
10389   llvm::FunctionCallee RTLFn;
10390   llvm::Value *Args[CleanupArgs];
10391 
10392 public:
10393   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10394                        ArrayRef<llvm::Value *> CallArgs)
10395       : RTLFn(RTLFn) {
10396     assert(CallArgs.size() == CleanupArgs &&
10397            "Size of arguments does not match.");
10398     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10399   }
10400   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10401     if (!CGF.HaveInsertPoint())
10402       return;
10403     CGF.EmitRuntimeCall(RTLFn, Args);
10404   }
10405 };
10406 } // namespace
10407 
10408 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10409                                                    const VarDecl *VD) {
10410   if (!VD)
10411     return Address::invalid();
10412   const VarDecl *CVD = VD->getCanonicalDecl();
10413   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10414     return Address::invalid();
10415   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10416   // Use the default allocation.
10417   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10418       !AA->getAllocator())
10419     return Address::invalid();
10420   llvm::Value *Size;
10421   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10422   if (CVD->getType()->isVariablyModifiedType()) {
10423     Size = CGF.getTypeSize(CVD->getType());
10424     // Align the size: ((size + align - 1) / align) * align
10425     Size = CGF.Builder.CreateNUWAdd(
10426         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10427     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10428     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10429   } else {
10430     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10431     Size = CGM.getSize(Sz.alignTo(Align));
10432   }
10433   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10434   assert(AA->getAllocator() &&
10435          "Expected allocator expression for non-default allocator.");
10436   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10437   // According to the standard, the original allocator type is a enum (integer).
10438   // Convert to pointer type, if required.
10439   if (Allocator->getType()->isIntegerTy())
10440     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10441   else if (Allocator->getType()->isPointerTy())
10442     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10443                                                                 CGM.VoidPtrTy);
10444   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10445 
10446   llvm::Value *Addr =
10447       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10448                           CVD->getName() + ".void.addr");
10449   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10450                                                               Allocator};
10451   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10452 
10453   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10454                                                 llvm::makeArrayRef(FiniArgs));
10455   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10456       Addr,
10457       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10458       CVD->getName() + ".addr");
10459   return Address(Addr, Align);
10460 }
10461 
10462 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10463     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10464     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10465   llvm_unreachable("Not supported in SIMD-only mode");
10466 }
10467 
10468 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10469     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10470     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10471   llvm_unreachable("Not supported in SIMD-only mode");
10472 }
10473 
10474 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10475     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10476     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10477     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10478     bool Tied, unsigned &NumberOfParts) {
10479   llvm_unreachable("Not supported in SIMD-only mode");
10480 }
10481 
10482 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10483                                            SourceLocation Loc,
10484                                            llvm::Function *OutlinedFn,
10485                                            ArrayRef<llvm::Value *> CapturedVars,
10486                                            const Expr *IfCond) {
10487   llvm_unreachable("Not supported in SIMD-only mode");
10488 }
10489 
10490 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10491     CodeGenFunction &CGF, StringRef CriticalName,
10492     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10493     const Expr *Hint) {
10494   llvm_unreachable("Not supported in SIMD-only mode");
10495 }
10496 
10497 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10498                                            const RegionCodeGenTy &MasterOpGen,
10499                                            SourceLocation Loc) {
10500   llvm_unreachable("Not supported in SIMD-only mode");
10501 }
10502 
10503 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10504                                             SourceLocation Loc) {
10505   llvm_unreachable("Not supported in SIMD-only mode");
10506 }
10507 
10508 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10509     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10510     SourceLocation Loc) {
10511   llvm_unreachable("Not supported in SIMD-only mode");
10512 }
10513 
10514 void CGOpenMPSIMDRuntime::emitSingleRegion(
10515     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10516     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10517     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10518     ArrayRef<const Expr *> AssignmentOps) {
10519   llvm_unreachable("Not supported in SIMD-only mode");
10520 }
10521 
10522 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10523                                             const RegionCodeGenTy &OrderedOpGen,
10524                                             SourceLocation Loc,
10525                                             bool IsThreads) {
10526   llvm_unreachable("Not supported in SIMD-only mode");
10527 }
10528 
10529 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10530                                           SourceLocation Loc,
10531                                           OpenMPDirectiveKind Kind,
10532                                           bool EmitChecks,
10533                                           bool ForceSimpleCall) {
10534   llvm_unreachable("Not supported in SIMD-only mode");
10535 }
10536 
10537 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10538     CodeGenFunction &CGF, SourceLocation Loc,
10539     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10540     bool Ordered, const DispatchRTInput &DispatchValues) {
10541   llvm_unreachable("Not supported in SIMD-only mode");
10542 }
10543 
10544 void CGOpenMPSIMDRuntime::emitForStaticInit(
10545     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10546     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10547   llvm_unreachable("Not supported in SIMD-only mode");
10548 }
10549 
10550 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10551     CodeGenFunction &CGF, SourceLocation Loc,
10552     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10553   llvm_unreachable("Not supported in SIMD-only mode");
10554 }
10555 
10556 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10557                                                      SourceLocation Loc,
10558                                                      unsigned IVSize,
10559                                                      bool IVSigned) {
10560   llvm_unreachable("Not supported in SIMD-only mode");
10561 }
10562 
10563 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10564                                               SourceLocation Loc,
10565                                               OpenMPDirectiveKind DKind) {
10566   llvm_unreachable("Not supported in SIMD-only mode");
10567 }
10568 
10569 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10570                                               SourceLocation Loc,
10571                                               unsigned IVSize, bool IVSigned,
10572                                               Address IL, Address LB,
10573                                               Address UB, Address ST) {
10574   llvm_unreachable("Not supported in SIMD-only mode");
10575 }
10576 
10577 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10578                                                llvm::Value *NumThreads,
10579                                                SourceLocation Loc) {
10580   llvm_unreachable("Not supported in SIMD-only mode");
10581 }
10582 
10583 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10584                                              OpenMPProcBindClauseKind ProcBind,
10585                                              SourceLocation Loc) {
10586   llvm_unreachable("Not supported in SIMD-only mode");
10587 }
10588 
10589 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10590                                                     const VarDecl *VD,
10591                                                     Address VDAddr,
10592                                                     SourceLocation Loc) {
10593   llvm_unreachable("Not supported in SIMD-only mode");
10594 }
10595 
10596 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10597     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10598     CodeGenFunction *CGF) {
10599   llvm_unreachable("Not supported in SIMD-only mode");
10600 }
10601 
10602 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10603     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10604   llvm_unreachable("Not supported in SIMD-only mode");
10605 }
10606 
10607 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10608                                     ArrayRef<const Expr *> Vars,
10609                                     SourceLocation Loc) {
10610   llvm_unreachable("Not supported in SIMD-only mode");
10611 }
10612 
10613 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10614                                        const OMPExecutableDirective &D,
10615                                        llvm::Function *TaskFunction,
10616                                        QualType SharedsTy, Address Shareds,
10617                                        const Expr *IfCond,
10618                                        const OMPTaskDataTy &Data) {
10619   llvm_unreachable("Not supported in SIMD-only mode");
10620 }
10621 
10622 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10623     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10624     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10625     const Expr *IfCond, const OMPTaskDataTy &Data) {
10626   llvm_unreachable("Not supported in SIMD-only mode");
10627 }
10628 
10629 void CGOpenMPSIMDRuntime::emitReduction(
10630     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10631     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10632     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10633   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10634   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10635                                  ReductionOps, Options);
10636 }
10637 
10638 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10639     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10640     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10641   llvm_unreachable("Not supported in SIMD-only mode");
10642 }
10643 
10644 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10645                                                   SourceLocation Loc,
10646                                                   ReductionCodeGen &RCG,
10647                                                   unsigned N) {
10648   llvm_unreachable("Not supported in SIMD-only mode");
10649 }
10650 
10651 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10652                                                   SourceLocation Loc,
10653                                                   llvm::Value *ReductionsPtr,
10654                                                   LValue SharedLVal) {
10655   llvm_unreachable("Not supported in SIMD-only mode");
10656 }
10657 
10658 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10659                                            SourceLocation Loc) {
10660   llvm_unreachable("Not supported in SIMD-only mode");
10661 }
10662 
10663 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10664     CodeGenFunction &CGF, SourceLocation Loc,
10665     OpenMPDirectiveKind CancelRegion) {
10666   llvm_unreachable("Not supported in SIMD-only mode");
10667 }
10668 
10669 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10670                                          SourceLocation Loc, const Expr *IfCond,
10671                                          OpenMPDirectiveKind CancelRegion) {
10672   llvm_unreachable("Not supported in SIMD-only mode");
10673 }
10674 
10675 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10676     const OMPExecutableDirective &D, StringRef ParentName,
10677     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10678     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10679   llvm_unreachable("Not supported in SIMD-only mode");
10680 }
10681 
10682 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10683                                          const OMPExecutableDirective &D,
10684                                          llvm::Function *OutlinedFn,
10685                                          llvm::Value *OutlinedFnID,
10686                                          const Expr *IfCond,
10687                                          const Expr *Device) {
10688   llvm_unreachable("Not supported in SIMD-only mode");
10689 }
10690 
10691 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10692   llvm_unreachable("Not supported in SIMD-only mode");
10693 }
10694 
10695 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10696   llvm_unreachable("Not supported in SIMD-only mode");
10697 }
10698 
10699 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10700   return false;
10701 }
10702 
10703 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10704   return nullptr;
10705 }
10706 
10707 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10708                                         const OMPExecutableDirective &D,
10709                                         SourceLocation Loc,
10710                                         llvm::Function *OutlinedFn,
10711                                         ArrayRef<llvm::Value *> CapturedVars) {
10712   llvm_unreachable("Not supported in SIMD-only mode");
10713 }
10714 
10715 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10716                                              const Expr *NumTeams,
10717                                              const Expr *ThreadLimit,
10718                                              SourceLocation Loc) {
10719   llvm_unreachable("Not supported in SIMD-only mode");
10720 }
10721 
10722 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10723     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10724     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10725   llvm_unreachable("Not supported in SIMD-only mode");
10726 }
10727 
10728 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10729     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10730     const Expr *Device) {
10731   llvm_unreachable("Not supported in SIMD-only mode");
10732 }
10733 
10734 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10735                                            const OMPLoopDirective &D,
10736                                            ArrayRef<Expr *> NumIterations) {
10737   llvm_unreachable("Not supported in SIMD-only mode");
10738 }
10739 
10740 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10741                                               const OMPDependClause *C) {
10742   llvm_unreachable("Not supported in SIMD-only mode");
10743 }
10744 
10745 const VarDecl *
10746 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10747                                         const VarDecl *NativeParam) const {
10748   llvm_unreachable("Not supported in SIMD-only mode");
10749 }
10750 
10751 Address
10752 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10753                                          const VarDecl *NativeParam,
10754                                          const VarDecl *TargetParam) const {
10755   llvm_unreachable("Not supported in SIMD-only mode");
10756 }
10757