1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755 };
756 
757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
758 /// region.
759 class CleanupTy final : public EHScopeStack::Cleanup {
760   PrePostActionTy *Action;
761 
762 public:
763   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765     if (!CGF.HaveInsertPoint())
766       return;
767     Action->Exit(CGF);
768   }
769 };
770 
771 } // anonymous namespace
772 
773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
774   CodeGenFunction::RunCleanupsScope Scope(CGF);
775   if (PrePostAction) {
776     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777     Callback(CodeGen, CGF, *PrePostAction);
778   } else {
779     PrePostActionTy Action;
780     Callback(CodeGen, CGF, Action);
781   }
782 }
783 
784 /// Check if the combiner is a call to UDR combiner and if it is so return the
785 /// UDR decl used for reduction.
786 static const OMPDeclareReductionDecl *
787 getReductionInit(const Expr *ReductionOp) {
788   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790       if (const auto *DRE =
791               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
793           return DRD;
794   return nullptr;
795 }
796 
797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
798                                              const OMPDeclareReductionDecl *DRD,
799                                              const Expr *InitOp,
800                                              Address Private, Address Original,
801                                              QualType Ty) {
802   if (DRD->getInitializer()) {
803     std::pair<llvm::Function *, llvm::Function *> Reduction =
804         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805     const auto *CE = cast<CallExpr>(InitOp);
806     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
809     const auto *LHSDRE =
810         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
811     const auto *RHSDRE =
812         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815                             [=]() { return Private; });
816     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817                             [=]() { return Original; });
818     (void)PrivateScope.Privatize();
819     RValue Func = RValue::get(Reduction.second);
820     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821     CGF.EmitIgnoredExpr(InitOp);
822   } else {
823     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825     auto *GV = new llvm::GlobalVariable(
826         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827         llvm::GlobalValue::PrivateLinkage, Init, Name);
828     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
829     RValue InitRVal;
830     switch (CGF.getEvaluationKind(Ty)) {
831     case TEK_Scalar:
832       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
833       break;
834     case TEK_Complex:
835       InitRVal =
836           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
837       break;
838     case TEK_Aggregate:
839       InitRVal = RValue::getAggregate(LV.getAddress());
840       break;
841     }
842     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845                          /*IsInitializer=*/false);
846   }
847 }
848 
849 /// Emit initialization of arrays of complex types.
850 /// \param DestAddr Address of the array.
851 /// \param Type Type of array.
852 /// \param Init Initial expression of array.
853 /// \param SrcAddr Address of the original array.
854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855                                  QualType Type, bool EmitDeclareReductionInit,
856                                  const Expr *Init,
857                                  const OMPDeclareReductionDecl *DRD,
858                                  Address SrcAddr = Address::invalid()) {
859   // Perform element-by-element initialization.
860   QualType ElementTy;
861 
862   // Drill down to the base element type on both arrays.
863   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
865   DestAddr =
866       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
867   if (DRD)
868     SrcAddr =
869         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
870 
871   llvm::Value *SrcBegin = nullptr;
872   if (DRD)
873     SrcBegin = SrcAddr.getPointer();
874   llvm::Value *DestBegin = DestAddr.getPointer();
875   // Cast from pointer to array type to pointer to single element.
876   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877   // The basic structure here is a while-do loop.
878   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880   llvm::Value *IsEmpty =
881       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
883 
884   // Enter the loop body, making that address the current address.
885   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886   CGF.EmitBlock(BodyBB);
887 
888   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
889 
890   llvm::PHINode *SrcElementPHI = nullptr;
891   Address SrcElementCurrent = Address::invalid();
892   if (DRD) {
893     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894                                           "omp.arraycpy.srcElementPast");
895     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
896     SrcElementCurrent =
897         Address(SrcElementPHI,
898                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
899   }
900   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902   DestElementPHI->addIncoming(DestBegin, EntryBB);
903   Address DestElementCurrent =
904       Address(DestElementPHI,
905               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906 
907   // Emit copy.
908   {
909     CodeGenFunction::RunCleanupsScope InitScope(CGF);
910     if (EmitDeclareReductionInit) {
911       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912                                        SrcElementCurrent, ElementTy);
913     } else
914       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915                            /*IsInitializer=*/false);
916   }
917 
918   if (DRD) {
919     // Shift the address forward by one element.
920     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
923   }
924 
925   // Shift the address forward by one element.
926   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928   // Check whether we've reached the end.
929   llvm::Value *Done =
930       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
933 
934   // Done.
935   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
936 }
937 
938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939   return CGF.EmitOMPSharedLValue(E);
940 }
941 
942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
943                                             const Expr *E) {
944   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
946   return LValue();
947 }
948 
949 void ReductionCodeGen::emitAggregateInitialization(
950     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951     const OMPDeclareReductionDecl *DRD) {
952   // Emit VarDecl with copy init for arrays.
953   // Get the address of the original variable captured in current
954   // captured region.
955   const auto *PrivateVD =
956       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957   bool EmitDeclareReductionInit =
958       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
959   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960                        EmitDeclareReductionInit,
961                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
962                                                 : PrivateVD->getInit(),
963                        DRD, SharedLVal.getAddress());
964 }
965 
966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
967                                    ArrayRef<const Expr *> Privates,
968                                    ArrayRef<const Expr *> ReductionOps) {
969   ClausesData.reserve(Shareds.size());
970   SharedAddresses.reserve(Shareds.size());
971   Sizes.reserve(Shareds.size());
972   BaseDecls.reserve(Shareds.size());
973   auto IPriv = Privates.begin();
974   auto IRed = ReductionOps.begin();
975   for (const Expr *Ref : Shareds) {
976     ClausesData.emplace_back(Ref, *IPriv, *IRed);
977     std::advance(IPriv, 1);
978     std::advance(IRed, 1);
979   }
980 }
981 
982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983   assert(SharedAddresses.size() == N &&
984          "Number of generated lvalues must be exactly N.");
985   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987   SharedAddresses.emplace_back(First, Second);
988 }
989 
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
991   const auto *PrivateVD =
992       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993   QualType PrivateType = PrivateVD->getType();
994   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995   if (!PrivateType->isVariablyModifiedType()) {
996     Sizes.emplace_back(
997         CGF.getTypeSize(
998             SharedAddresses[N].first.getType().getNonReferenceType()),
999         nullptr);
1000     return;
1001   }
1002   llvm::Value *Size;
1003   llvm::Value *SizeInChars;
1004   auto *ElemType =
1005       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1006           ->getElementType();
1007   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008   if (AsArraySection) {
1009     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010                                      SharedAddresses[N].first.getPointer());
1011     Size = CGF.Builder.CreateNUWAdd(
1012         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1014   } else {
1015     SizeInChars = CGF.getTypeSize(
1016         SharedAddresses[N].first.getType().getNonReferenceType());
1017     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1018   }
1019   Sizes.emplace_back(SizeInChars, Size);
1020   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1021       CGF,
1022       cast<OpaqueValueExpr>(
1023           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024       RValue::get(Size));
1025   CGF.EmitVariablyModifiedType(PrivateType);
1026 }
1027 
1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1029                                          llvm::Value *Size) {
1030   const auto *PrivateVD =
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032   QualType PrivateType = PrivateVD->getType();
1033   if (!PrivateType->isVariablyModifiedType()) {
1034     assert(!Size && !Sizes[N].second &&
1035            "Size should be nullptr for non-variably modified reduction "
1036            "items.");
1037     return;
1038   }
1039   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1040       CGF,
1041       cast<OpaqueValueExpr>(
1042           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1043       RValue::get(Size));
1044   CGF.EmitVariablyModifiedType(PrivateType);
1045 }
1046 
1047 void ReductionCodeGen::emitInitialization(
1048     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050   assert(SharedAddresses.size() > N && "No variable was generated");
1051   const auto *PrivateVD =
1052       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053   const OMPDeclareReductionDecl *DRD =
1054       getReductionInit(ClausesData[N].ReductionOp);
1055   QualType PrivateType = PrivateVD->getType();
1056   PrivateAddr = CGF.Builder.CreateElementBitCast(
1057       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058   QualType SharedType = SharedAddresses[N].first.getType();
1059   SharedLVal = CGF.MakeAddrLValue(
1060       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061                                        CGF.ConvertTypeForMem(SharedType)),
1062       SharedType, SharedAddresses[N].first.getBaseInfo(),
1063       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1067     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068                                      PrivateAddr, SharedLVal.getAddress(),
1069                                      SharedLVal.getType());
1070   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1071              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1072     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073                          PrivateVD->getType().getQualifiers(),
1074                          /*IsInitializer=*/false);
1075   }
1076 }
1077 
1078 bool ReductionCodeGen::needCleanups(unsigned N) {
1079   const auto *PrivateVD =
1080       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081   QualType PrivateType = PrivateVD->getType();
1082   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083   return DTorKind != QualType::DK_none;
1084 }
1085 
1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1087                                     Address PrivateAddr) {
1088   const auto *PrivateVD =
1089       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090   QualType PrivateType = PrivateVD->getType();
1091   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092   if (needCleanups(N)) {
1093     PrivateAddr = CGF.Builder.CreateElementBitCast(
1094         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1096   }
1097 }
1098 
1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1100                           LValue BaseLV) {
1101   BaseTy = BaseTy.getNonReferenceType();
1102   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1103          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1104     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1106     } else {
1107       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1109     }
1110     BaseTy = BaseTy->getPointeeType();
1111   }
1112   return CGF.MakeAddrLValue(
1113       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1114                                        CGF.ConvertTypeForMem(ElTy)),
1115       BaseLV.getType(), BaseLV.getBaseInfo(),
1116       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1117 }
1118 
1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1120                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121                           llvm::Value *Addr) {
1122   Address Tmp = Address::invalid();
1123   Address TopTmp = Address::invalid();
1124   Address MostTopTmp = Address::invalid();
1125   BaseTy = BaseTy.getNonReferenceType();
1126   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1127          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1128     Tmp = CGF.CreateMemTemp(BaseTy);
1129     if (TopTmp.isValid())
1130       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1131     else
1132       MostTopTmp = Tmp;
1133     TopTmp = Tmp;
1134     BaseTy = BaseTy->getPointeeType();
1135   }
1136   llvm::Type *Ty = BaseLVType;
1137   if (Tmp.isValid())
1138     Ty = Tmp.getElementType();
1139   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140   if (Tmp.isValid()) {
1141     CGF.Builder.CreateStore(Addr, Tmp);
1142     return MostTopTmp;
1143   }
1144   return Address(Addr, BaseLVAlignment);
1145 }
1146 
1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148   const VarDecl *OrigVD = nullptr;
1149   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154       Base = TempASE->getBase()->IgnoreParenImpCasts();
1155     DE = cast<DeclRefExpr>(Base);
1156     OrigVD = cast<VarDecl>(DE->getDecl());
1157   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160       Base = TempASE->getBase()->IgnoreParenImpCasts();
1161     DE = cast<DeclRefExpr>(Base);
1162     OrigVD = cast<VarDecl>(DE->getDecl());
1163   }
1164   return OrigVD;
1165 }
1166 
1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1168                                                Address PrivateAddr) {
1169   const DeclRefExpr *DE;
1170   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171     BaseDecls.emplace_back(OrigVD);
1172     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1173     LValue BaseLValue =
1174         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175                     OriginalBaseLValue);
1176     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178     llvm::Value *PrivatePointer =
1179         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1180             PrivateAddr.getPointer(),
1181             SharedAddresses[N].first.getAddress().getType());
1182     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183     return castToBase(CGF, OrigVD->getType(),
1184                       SharedAddresses[N].first.getType(),
1185                       OriginalBaseLValue.getAddress().getType(),
1186                       OriginalBaseLValue.getAlignment(), Ptr);
1187   }
1188   BaseDecls.emplace_back(
1189       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1190   return PrivateAddr;
1191 }
1192 
1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1194   const OMPDeclareReductionDecl *DRD =
1195       getReductionInit(ClausesData[N].ReductionOp);
1196   return DRD && DRD->getInitializer();
1197 }
1198 
1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200   return CGF.EmitLoadOfPointerLValue(
1201       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202       getThreadIDVariable()->getType()->castAs<PointerType>());
1203 }
1204 
1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206   if (!CGF.HaveInsertPoint())
1207     return;
1208   // 1.2.2 OpenMP Language Terminology
1209   // Structured block - An executable statement with a single entry at the
1210   // top and a single exit at the bottom.
1211   // The point of exit cannot be a branch out of the structured block.
1212   // longjmp() and throw() must not violate the entry/exit criteria.
1213   CGF.EHStack.pushTerminate();
1214   CodeGen(CGF);
1215   CGF.EHStack.popTerminate();
1216 }
1217 
1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219     CodeGenFunction &CGF) {
1220   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221                             getThreadIDVariable()->getType(),
1222                             AlignmentSource::Decl);
1223 }
1224 
1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1226                                        QualType FieldTy) {
1227   auto *Field = FieldDecl::Create(
1228       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1229       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1230       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231   Field->setAccess(AS_public);
1232   DC->addDecl(Field);
1233   return Field;
1234 }
1235 
1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237                                  StringRef Separator)
1238     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1239       OffloadEntriesInfoManager(CGM) {
1240   ASTContext &C = CGM.getContext();
1241   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243   RD->startDefinition();
1244   // reserved_1
1245   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1246   // flags
1247   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1248   // reserved_2
1249   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1250   // reserved_3
1251   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1252   // psource
1253   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254   RD->completeDefinition();
1255   IdentQTy = C.getRecordType(RD);
1256   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1258 
1259   loadOffloadInfoMetadata();
1260 }
1261 
1262 void CGOpenMPRuntime::clear() {
1263   InternalVars.clear();
1264   // Clean non-target variable declarations possibly used only in debug info.
1265   for (const auto &Data : EmittedNonTargetVariables) {
1266     if (!Data.getValue().pointsToAliveValue())
1267       continue;
1268     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1269     if (!GV)
1270       continue;
1271     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1272       continue;
1273     GV->eraseFromParent();
1274   }
1275 }
1276 
1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278   SmallString<128> Buffer;
1279   llvm::raw_svector_ostream OS(Buffer);
1280   StringRef Sep = FirstSeparator;
1281   for (StringRef Part : Parts) {
1282     OS << Sep << Part;
1283     Sep = Separator;
1284   }
1285   return OS.str();
1286 }
1287 
1288 static llvm::Function *
1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1290                           const Expr *CombinerInitializer, const VarDecl *In,
1291                           const VarDecl *Out, bool IsCombiner) {
1292   // void .omp_combiner.(Ty *in, Ty *out);
1293   ASTContext &C = CGM.getContext();
1294   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295   FunctionArgList Args;
1296   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300   Args.push_back(&OmpOutParm);
1301   Args.push_back(&OmpInParm);
1302   const CGFunctionInfo &FnInfo =
1303       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1304   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305   std::string Name = CGM.getOpenMPRuntime().getName(
1306       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1307   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1308                                     Name, &CGM.getModule());
1309   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310   if (CGM.getLangOpts().Optimize) {
1311     Fn->removeFnAttr(llvm::Attribute::NoInline);
1312     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1314   }
1315   CodeGenFunction CGF(CGM);
1316   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319                     Out->getLocation());
1320   CodeGenFunction::OMPPrivateScope Scope(CGF);
1321   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1324         .getAddress();
1325   });
1326   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   (void)Scope.Privatize();
1332   if (!IsCombiner && Out->hasInit() &&
1333       !CGF.isTrivialInitializer(Out->getInit())) {
1334     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335                          Out->getType().getQualifiers(),
1336                          /*IsInitializer=*/true);
1337   }
1338   if (CombinerInitializer)
1339     CGF.EmitIgnoredExpr(CombinerInitializer);
1340   Scope.ForceCleanup();
1341   CGF.FinishFunction();
1342   return Fn;
1343 }
1344 
1345 void CGOpenMPRuntime::emitUserDefinedReduction(
1346     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347   if (UDRMap.count(D) > 0)
1348     return;
1349   llvm::Function *Combiner = emitCombinerOrInitializer(
1350       CGM, D->getType(), D->getCombiner(),
1351       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353       /*IsCombiner=*/true);
1354   llvm::Function *Initializer = nullptr;
1355   if (const Expr *Init = D->getInitializer()) {
1356     Initializer = emitCombinerOrInitializer(
1357         CGM, D->getType(),
1358         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1359                                                                      : nullptr,
1360         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362         /*IsCombiner=*/false);
1363   }
1364   UDRMap.try_emplace(D, Combiner, Initializer);
1365   if (CGF) {
1366     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367     Decls.second.push_back(D);
1368   }
1369 }
1370 
1371 std::pair<llvm::Function *, llvm::Function *>
1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1373   auto I = UDRMap.find(D);
1374   if (I != UDRMap.end())
1375     return I->second;
1376   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377   return UDRMap.lookup(D);
1378 }
1379 
1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1381     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384   assert(ThreadIDVar->getType()->isPointerType() &&
1385          "thread id variable must be of type kmp_int32 *");
1386   CodeGenFunction CGF(CGM, true);
1387   bool HasCancel = false;
1388   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389     HasCancel = OPD->hasCancel();
1390   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391     HasCancel = OPSD->hasCancel();
1392   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393     HasCancel = OPFD->hasCancel();
1394   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395     HasCancel = OPFD->hasCancel();
1396   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397     HasCancel = OPFD->hasCancel();
1398   else if (const auto *OPFD =
1399                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD =
1402                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403     HasCancel = OPFD->hasCancel();
1404   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405                                     HasCancel, OutlinedHelperName);
1406   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1408 }
1409 
1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1411     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1414   return emitParallelOrTeamsOutlinedFunction(
1415       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416 }
1417 
1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1419     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1422   return emitParallelOrTeamsOutlinedFunction(
1423       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1424 }
1425 
1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1427     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430     bool Tied, unsigned &NumberOfParts) {
1431   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432                                               PrePostActionTy &) {
1433     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435     llvm::Value *TaskArgs[] = {
1436         UpLoc, ThreadID,
1437         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438                                     TaskTVar->getType()->castAs<PointerType>())
1439             .getPointer()};
1440     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1441   };
1442   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1443                                                             UntiedCodeGen);
1444   CodeGen.setAction(Action);
1445   assert(!ThreadIDVar->getType()->isPointerType() &&
1446          "thread id variable must be of type kmp_int32 for tasks");
1447   const OpenMPDirectiveKind Region =
1448       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1449                                                       : OMPD_task;
1450   const CapturedStmt *CS = D.getCapturedStmt(Region);
1451   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452   CodeGenFunction CGF(CGM, true);
1453   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1454                                         InnermostKind,
1455                                         TD ? TD->hasCancel() : false, Action);
1456   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1458   if (!Tied)
1459     NumberOfParts = Action.getNumberOfParts();
1460   return Res;
1461 }
1462 
1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1464                              const RecordDecl *RD, const CGRecordLayout &RL,
1465                              ArrayRef<llvm::Constant *> Data) {
1466   llvm::StructType *StructTy = RL.getLLVMType();
1467   unsigned PrevIdx = 0;
1468   ConstantInitBuilder CIBuilder(CGM);
1469   auto DI = Data.begin();
1470   for (const FieldDecl *FD : RD->fields()) {
1471     unsigned Idx = RL.getLLVMFieldNo(FD);
1472     // Fill the alignment.
1473     for (unsigned I = PrevIdx; I < Idx; ++I)
1474       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1475     PrevIdx = Idx + 1;
1476     Fields.add(*DI);
1477     ++DI;
1478   }
1479 }
1480 
1481 template <class... As>
1482 static llvm::GlobalVariable *
1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1484                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1485                    As &&... Args) {
1486   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488   ConstantInitBuilder CIBuilder(CGM);
1489   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490   buildStructValue(Fields, CGM, RD, RL, Data);
1491   return Fields.finishAndCreateGlobal(
1492       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493       std::forward<As>(Args)...);
1494 }
1495 
1496 template <typename T>
1497 static void
1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1499                                          ArrayRef<llvm::Constant *> Data,
1500                                          T &Parent) {
1501   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504   buildStructValue(Fields, CGM, RD, RL, Data);
1505   Fields.finishAndAddTo(Parent);
1506 }
1507 
1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511   FlagsTy FlagsKey(Flags, Reserved2Flags);
1512   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1513   if (!Entry) {
1514     if (!DefaultOpenMPPSource) {
1515       // Initialize default location for psource field of ident_t structure of
1516       // all ident_t objects. Format is ";file;function;line;column;;".
1517       // Taken from
1518       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519       DefaultOpenMPPSource =
1520           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521       DefaultOpenMPPSource =
1522           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1523     }
1524 
1525     llvm::Constant *Data[] = {
1526         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530     llvm::GlobalValue *DefaultOpenMPLocation =
1531         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532                            llvm::GlobalValue::PrivateLinkage);
1533     DefaultOpenMPLocation->setUnnamedAddr(
1534         llvm::GlobalValue::UnnamedAddr::Global);
1535 
1536     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1537   }
1538   return Address(Entry, Align);
1539 }
1540 
1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1542                                              bool AtCurrentPoint) {
1543   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1545 
1546   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547   if (AtCurrentPoint) {
1548     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1550   } else {
1551     Elem.second.ServiceInsertPt =
1552         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1554   }
1555 }
1556 
1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1558   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559   if (Elem.second.ServiceInsertPt) {
1560     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561     Elem.second.ServiceInsertPt = nullptr;
1562     Ptr->eraseFromParent();
1563   }
1564 }
1565 
1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1567                                                  SourceLocation Loc,
1568                                                  unsigned Flags) {
1569   Flags |= OMP_IDENT_KMPC;
1570   // If no debug info is generated - return global default location.
1571   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1572       Loc.isInvalid())
1573     return getOrCreateDefaultLocation(Flags).getPointer();
1574 
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576 
1577   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578   Address LocValue = Address::invalid();
1579   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580   if (I != OpenMPLocThreadIDMap.end())
1581     LocValue = Address(I->second.DebugLoc, Align);
1582 
1583   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584   // GetOpenMPThreadID was called before this routine.
1585   if (!LocValue.isValid()) {
1586     // Generate "ident_t .kmpc_loc.addr;"
1587     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589     Elem.second.DebugLoc = AI.getPointer();
1590     LocValue = AI;
1591 
1592     if (!Elem.second.ServiceInsertPt)
1593       setLocThreadIdInsertPt(CGF);
1594     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597                              CGF.getTypeSize(IdentQTy));
1598   }
1599 
1600   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1603   LValue PSource =
1604       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1605 
1606   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607   if (OMPDebugLoc == nullptr) {
1608     SmallString<128> Buffer2;
1609     llvm::raw_svector_ostream OS2(Buffer2);
1610     // Build debug location
1611     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1612     OS2 << ";" << PLoc.getFilename() << ";";
1613     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614       OS2 << FD->getQualifiedNameAsString();
1615     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1618   }
1619   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1621 
1622   // Our callers always pass this to a runtime function, so for
1623   // convenience, go ahead and return a naked pointer.
1624   return LocValue.getPointer();
1625 }
1626 
1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1628                                           SourceLocation Loc) {
1629   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1630 
1631   llvm::Value *ThreadID = nullptr;
1632   // Check whether we've already cached a load of the thread id in this
1633   // function.
1634   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635   if (I != OpenMPLocThreadIDMap.end()) {
1636     ThreadID = I->second.ThreadID;
1637     if (ThreadID != nullptr)
1638       return ThreadID;
1639   }
1640   // If exceptions are enabled, do not use parameter to avoid possible crash.
1641   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1642       !CGF.getLangOpts().CXXExceptions ||
1643       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1644     if (auto *OMPRegionInfo =
1645             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646       if (OMPRegionInfo->getThreadIDVariable()) {
1647         // Check if this an outlined function with thread id passed as argument.
1648         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650         // If value loaded in entry block, cache it and use it everywhere in
1651         // function.
1652         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654           Elem.second.ThreadID = ThreadID;
1655         }
1656         return ThreadID;
1657       }
1658     }
1659   }
1660 
1661   // This is not an outlined function region - need to call __kmpc_int32
1662   // kmpc_global_thread_num(ident_t *loc).
1663   // Generate thread id value and cache this value for use across the
1664   // function.
1665   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666   if (!Elem.second.ServiceInsertPt)
1667     setLocThreadIdInsertPt(CGF);
1668   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670   llvm::CallInst *Call = CGF.Builder.CreateCall(
1671       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1672       emitUpdateLocation(CGF, Loc));
1673   Call->setCallingConv(CGF.getRuntimeCC());
1674   Elem.second.ThreadID = Call;
1675   return Call;
1676 }
1677 
1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1679   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1681     clearLocThreadIdInsertPt(CGF);
1682     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1683   }
1684   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685     for(auto *D : FunctionUDRMap[CGF.CurFn])
1686       UDRMap.erase(D);
1687     FunctionUDRMap.erase(CGF.CurFn);
1688   }
1689 }
1690 
1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1692   return IdentTy->getPointerTo();
1693 }
1694 
1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1696   if (!Kmpc_MicroTy) {
1697     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1701   }
1702   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1703 }
1704 
1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706   llvm::FunctionCallee RTLFn = nullptr;
1707   switch (static_cast<OpenMPRTLFunction>(Function)) {
1708   case OMPRTL__kmpc_fork_call: {
1709     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1710     // microtask, ...);
1711     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712                                 getKmpc_MicroPointerTy()};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718         llvm::LLVMContext &Ctx = F->getContext();
1719         llvm::MDBuilder MDB(Ctx);
1720         // Annotate the callback behavior of the __kmpc_fork_call:
1721         //  - The callback callee is argument number 2 (microtask).
1722         //  - The first two arguments of the callback callee are unknown (-1).
1723         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1724         //    callback callee.
1725         F->addMetadata(
1726             llvm::LLVMContext::MD_callback,
1727             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1728                                         2, {-1, -1},
1729                                         /* VarArgsArePassed */ true)}));
1730       }
1731     }
1732     break;
1733   }
1734   case OMPRTL__kmpc_global_thread_num: {
1735     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1740     break;
1741   }
1742   case OMPRTL__kmpc_threadprivate_cached: {
1743     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1746                                 CGM.VoidPtrTy, CGM.SizeTy,
1747                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1751     break;
1752   }
1753   case OMPRTL__kmpc_critical: {
1754     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755     // kmp_critical_name *crit);
1756     llvm::Type *TypeParams[] = {
1757         getIdentTyPointerTy(), CGM.Int32Ty,
1758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical_with_hint: {
1765     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit, uintptr_t hint);
1767     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1769                                 CGM.IntPtrTy};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1773     break;
1774   }
1775   case OMPRTL__kmpc_threadprivate_register: {
1776     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778     // typedef void *(*kmpc_ctor)(void *);
1779     auto *KmpcCtorTy =
1780         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781                                 /*isVarArg*/ false)->getPointerTo();
1782     // typedef void *(*kmpc_cctor)(void *, void *);
1783     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784     auto *KmpcCopyCtorTy =
1785         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1786                                 /*isVarArg*/ false)
1787             ->getPointerTo();
1788     // typedef void (*kmpc_dtor)(void *);
1789     auto *KmpcDtorTy =
1790         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1791             ->getPointerTo();
1792     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793                               KmpcCopyCtorTy, KmpcDtorTy};
1794     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795                                         /*isVarArg*/ false);
1796     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1797     break;
1798   }
1799   case OMPRTL__kmpc_end_critical: {
1800     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801     // kmp_critical_name *crit);
1802     llvm::Type *TypeParams[] = {
1803         getIdentTyPointerTy(), CGM.Int32Ty,
1804         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1808     break;
1809   }
1810   case OMPRTL__kmpc_cancel_barrier: {
1811     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1812     // global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1817     break;
1818   }
1819   case OMPRTL__kmpc_barrier: {
1820     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822     auto *FnTy =
1823         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1825     break;
1826   }
1827   case OMPRTL__kmpc_for_static_fini: {
1828     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830     auto *FnTy =
1831         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1833     break;
1834   }
1835   case OMPRTL__kmpc_push_num_threads: {
1836     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837     // kmp_int32 num_threads)
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1839                                 CGM.Int32Ty};
1840     auto *FnTy =
1841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1843     break;
1844   }
1845   case OMPRTL__kmpc_serialized_parallel: {
1846     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1847     // global_tid);
1848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849     auto *FnTy =
1850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1852     break;
1853   }
1854   case OMPRTL__kmpc_end_serialized_parallel: {
1855     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1856     // global_tid);
1857     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1861     break;
1862   }
1863   case OMPRTL__kmpc_flush: {
1864     // Build void __kmpc_flush(ident_t *loc);
1865     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1866     auto *FnTy =
1867         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1869     break;
1870   }
1871   case OMPRTL__kmpc_master: {
1872     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1877     break;
1878   }
1879   case OMPRTL__kmpc_end_master: {
1880     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1882     auto *FnTy =
1883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1885     break;
1886   }
1887   case OMPRTL__kmpc_omp_taskyield: {
1888     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1889     // int end_part);
1890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1891     auto *FnTy =
1892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1894     break;
1895   }
1896   case OMPRTL__kmpc_single: {
1897     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899     auto *FnTy =
1900         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1902     break;
1903   }
1904   case OMPRTL__kmpc_end_single: {
1905     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907     auto *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1910     break;
1911   }
1912   case OMPRTL__kmpc_omp_task_alloc: {
1913     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915     // kmp_routine_entry_t *task_entry);
1916     assert(KmpRoutineEntryPtrTy != nullptr &&
1917            "Type kmp_routine_entry_t must be created.");
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920     // Return void * and then cast to particular kmp_task_t type.
1921     auto *FnTy =
1922         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1924     break;
1925   }
1926   case OMPRTL__kmpc_omp_target_task_alloc: {
1927     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930     assert(KmpRoutineEntryPtrTy != nullptr &&
1931            "Type kmp_routine_entry_t must be created.");
1932     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1934                                 CGM.Int64Ty};
1935     // Return void * and then cast to particular kmp_task_t type.
1936     auto *FnTy =
1937         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1939     break;
1940   }
1941   case OMPRTL__kmpc_omp_task: {
1942     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1943     // *new_task);
1944     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1945                                 CGM.VoidPtrTy};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1949     break;
1950   }
1951   case OMPRTL__kmpc_copyprivate: {
1952     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954     // kmp_int32 didit);
1955     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1956     auto *CpyFnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1960                                 CGM.Int32Ty};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1964     break;
1965   }
1966   case OMPRTL__kmpc_reduce: {
1967     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972                                                /*isVarArg=*/false);
1973     llvm::Type *TypeParams[] = {
1974         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1975         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1980     break;
1981   }
1982   case OMPRTL__kmpc_reduce_nowait: {
1983     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1986     // *lck);
1987     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989                                                /*isVarArg=*/false);
1990     llvm::Type *TypeParams[] = {
1991         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1992         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1994     auto *FnTy =
1995         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1997     break;
1998   }
1999   case OMPRTL__kmpc_end_reduce: {
2000     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001     // kmp_critical_name *lck);
2002     llvm::Type *TypeParams[] = {
2003         getIdentTyPointerTy(), CGM.Int32Ty,
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce_nowait: {
2011     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn =
2019         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2020     break;
2021   }
2022   case OMPRTL__kmpc_omp_task_begin_if0: {
2023     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2024     // *new_task);
2025     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2026                                 CGM.VoidPtrTy};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_complete_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_ordered: {
2045     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2050     break;
2051   }
2052   case OMPRTL__kmpc_end_ordered: {
2053     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2055     auto *FnTy =
2056         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2058     break;
2059   }
2060   case OMPRTL__kmpc_omp_taskwait: {
2061     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2063     auto *FnTy =
2064         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2066     break;
2067   }
2068   case OMPRTL__kmpc_taskgroup: {
2069     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2071     auto *FnTy =
2072         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2074     break;
2075   }
2076   case OMPRTL__kmpc_end_taskgroup: {
2077     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2079     auto *FnTy =
2080         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2082     break;
2083   }
2084   case OMPRTL__kmpc_push_proc_bind: {
2085     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2086     // int proc_bind)
2087     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2088     auto *FnTy =
2089         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2091     break;
2092   }
2093   case OMPRTL__kmpc_omp_task_with_deps: {
2094     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097     llvm::Type *TypeParams[] = {
2098         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2099         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2100     auto *FnTy =
2101         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2102     RTLFn =
2103         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2104     break;
2105   }
2106   case OMPRTL__kmpc_omp_wait_deps: {
2107     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109     // kmp_depend_info_t *noalias_dep_list);
2110     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2111                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2112                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2113     auto *FnTy =
2114         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2116     break;
2117   }
2118   case OMPRTL__kmpc_cancellationpoint: {
2119     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120     // global_tid, kmp_int32 cncl_kind)
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2122     auto *FnTy =
2123         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2125     break;
2126   }
2127   case OMPRTL__kmpc_cancel: {
2128     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129     // kmp_int32 cncl_kind)
2130     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2131     auto *FnTy =
2132         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2134     break;
2135   }
2136   case OMPRTL__kmpc_push_num_teams: {
2137     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138     // kmp_int32 num_teams, kmp_int32 num_threads)
2139     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2140         CGM.Int32Ty};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2144     break;
2145   }
2146   case OMPRTL__kmpc_fork_teams: {
2147     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2148     // microtask, ...);
2149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150                                 getKmpc_MicroPointerTy()};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156         llvm::LLVMContext &Ctx = F->getContext();
2157         llvm::MDBuilder MDB(Ctx);
2158         // Annotate the callback behavior of the __kmpc_fork_teams:
2159         //  - The callback callee is argument number 2 (microtask).
2160         //  - The first two arguments of the callback callee are unknown (-1).
2161         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2162         //    callback callee.
2163         F->addMetadata(
2164             llvm::LLVMContext::MD_callback,
2165             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2166                                         2, {-1, -1},
2167                                         /* VarArgsArePassed */ true)}));
2168       }
2169     }
2170     break;
2171   }
2172   case OMPRTL__kmpc_taskloop: {
2173     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175     // sched, kmp_uint64 grainsize, void *task_dup);
2176     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2177                                 CGM.IntTy,
2178                                 CGM.VoidPtrTy,
2179                                 CGM.IntTy,
2180                                 CGM.Int64Ty->getPointerTo(),
2181                                 CGM.Int64Ty->getPointerTo(),
2182                                 CGM.Int64Ty,
2183                                 CGM.IntTy,
2184                                 CGM.IntTy,
2185                                 CGM.Int64Ty,
2186                                 CGM.VoidPtrTy};
2187     auto *FnTy =
2188         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2190     break;
2191   }
2192   case OMPRTL__kmpc_doacross_init: {
2193     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194     // num_dims, struct kmp_dim *dims);
2195     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2196                                 CGM.Int32Ty,
2197                                 CGM.Int32Ty,
2198                                 CGM.VoidPtrTy};
2199     auto *FnTy =
2200         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2202     break;
2203   }
2204   case OMPRTL__kmpc_doacross_fini: {
2205     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207     auto *FnTy =
2208         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2210     break;
2211   }
2212   case OMPRTL__kmpc_doacross_post: {
2213     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2214     // *vec);
2215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216                                 CGM.Int64Ty->getPointerTo()};
2217     auto *FnTy =
2218         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2220     break;
2221   }
2222   case OMPRTL__kmpc_doacross_wait: {
2223     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2224     // *vec);
2225     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226                                 CGM.Int64Ty->getPointerTo()};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2230     break;
2231   }
2232   case OMPRTL__kmpc_task_reduction_init: {
2233     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2234     // *data);
2235     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2238     RTLFn =
2239         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2240     break;
2241   }
2242   case OMPRTL__kmpc_task_reduction_get_th_data: {
2243     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2244     // *d);
2245     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2246     auto *FnTy =
2247         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248     RTLFn = CGM.CreateRuntimeFunction(
2249         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2250     break;
2251   }
2252   case OMPRTL__kmpc_alloc: {
2253     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254     // al); omp_allocator_handle_t type is void *.
2255     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2259     break;
2260   }
2261   case OMPRTL__kmpc_free: {
2262     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263     // al); omp_allocator_handle_t type is void *.
2264     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2265     auto *FnTy =
2266         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2268     break;
2269   }
2270   case OMPRTL__kmpc_push_target_tripcount: {
2271     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2272     // size);
2273     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274     llvm::FunctionType *FnTy =
2275         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2277     break;
2278   }
2279   case OMPRTL__tgt_target: {
2280     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2282     // *arg_types);
2283     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284                                 CGM.VoidPtrTy,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.Int64Ty->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo()};
2290     auto *FnTy =
2291         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2293     break;
2294   }
2295   case OMPRTL__tgt_target_nowait: {
2296     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298     // int64_t *arg_types);
2299     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300                                 CGM.VoidPtrTy,
2301                                 CGM.Int32Ty,
2302                                 CGM.VoidPtrPtrTy,
2303                                 CGM.VoidPtrPtrTy,
2304                                 CGM.Int64Ty->getPointerTo(),
2305                                 CGM.Int64Ty->getPointerTo()};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2309     break;
2310   }
2311   case OMPRTL__tgt_target_teams: {
2312     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2316                                 CGM.VoidPtrTy,
2317                                 CGM.Int32Ty,
2318                                 CGM.VoidPtrPtrTy,
2319                                 CGM.VoidPtrPtrTy,
2320                                 CGM.Int64Ty->getPointerTo(),
2321                                 CGM.Int64Ty->getPointerTo(),
2322                                 CGM.Int32Ty,
2323                                 CGM.Int32Ty};
2324     auto *FnTy =
2325         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2327     break;
2328   }
2329   case OMPRTL__tgt_target_teams_nowait: {
2330     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2334                                 CGM.VoidPtrTy,
2335                                 CGM.Int32Ty,
2336                                 CGM.VoidPtrPtrTy,
2337                                 CGM.VoidPtrPtrTy,
2338                                 CGM.Int64Ty->getPointerTo(),
2339                                 CGM.Int64Ty->getPointerTo(),
2340                                 CGM.Int32Ty,
2341                                 CGM.Int32Ty};
2342     auto *FnTy =
2343         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2345     break;
2346   }
2347   case OMPRTL__tgt_register_requires: {
2348     // Build void __tgt_register_requires(int64_t flags);
2349     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2350     auto *FnTy =
2351         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2353     break;
2354   }
2355   case OMPRTL__tgt_register_lib: {
2356     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2357     QualType ParamTy =
2358         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2359     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2363     break;
2364   }
2365   case OMPRTL__tgt_unregister_lib: {
2366     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2367     QualType ParamTy =
2368         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2369     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2373     break;
2374   }
2375   case OMPRTL__tgt_target_data_begin: {
2376     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379                                 CGM.Int32Ty,
2380                                 CGM.VoidPtrPtrTy,
2381                                 CGM.VoidPtrPtrTy,
2382                                 CGM.Int64Ty->getPointerTo(),
2383                                 CGM.Int64Ty->getPointerTo()};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2387     break;
2388   }
2389   case OMPRTL__tgt_target_data_begin_nowait: {
2390     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2392     // *arg_types);
2393     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394                                 CGM.Int32Ty,
2395                                 CGM.VoidPtrPtrTy,
2396                                 CGM.VoidPtrPtrTy,
2397                                 CGM.Int64Ty->getPointerTo(),
2398                                 CGM.Int64Ty->getPointerTo()};
2399     auto *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2402     break;
2403   }
2404   case OMPRTL__tgt_target_data_end: {
2405     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408                                 CGM.Int32Ty,
2409                                 CGM.VoidPtrPtrTy,
2410                                 CGM.VoidPtrPtrTy,
2411                                 CGM.Int64Ty->getPointerTo(),
2412                                 CGM.Int64Ty->getPointerTo()};
2413     auto *FnTy =
2414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2416     break;
2417   }
2418   case OMPRTL__tgt_target_data_end_nowait: {
2419     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421     // *arg_types);
2422     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423                                 CGM.Int32Ty,
2424                                 CGM.VoidPtrPtrTy,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.Int64Ty->getPointerTo(),
2427                                 CGM.Int64Ty->getPointerTo()};
2428     auto *FnTy =
2429         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2431     break;
2432   }
2433   case OMPRTL__tgt_target_data_update: {
2434     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2437                                 CGM.Int32Ty,
2438                                 CGM.VoidPtrPtrTy,
2439                                 CGM.VoidPtrPtrTy,
2440                                 CGM.Int64Ty->getPointerTo(),
2441                                 CGM.Int64Ty->getPointerTo()};
2442     auto *FnTy =
2443         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2445     break;
2446   }
2447   case OMPRTL__tgt_target_data_update_nowait: {
2448     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2450     // *arg_types);
2451     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452                                 CGM.Int32Ty,
2453                                 CGM.VoidPtrPtrTy,
2454                                 CGM.VoidPtrPtrTy,
2455                                 CGM.Int64Ty->getPointerTo(),
2456                                 CGM.Int64Ty->getPointerTo()};
2457     auto *FnTy =
2458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2460     break;
2461   }
2462   }
2463   assert(RTLFn && "Unable to find OpenMP runtime function");
2464   return RTLFn;
2465 }
2466 
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469   assert((IVSize == 32 || IVSize == 64) &&
2470          "IV size is not compatible with the omp runtime");
2471   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2472                                             : "__kmpc_for_static_init_4u")
2473                                 : (IVSigned ? "__kmpc_for_static_init_8"
2474                                             : "__kmpc_for_static_init_8u");
2475   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477   llvm::Type *TypeParams[] = {
2478     getIdentTyPointerTy(),                     // loc
2479     CGM.Int32Ty,                               // tid
2480     CGM.Int32Ty,                               // schedtype
2481     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2482     PtrTy,                                     // p_lower
2483     PtrTy,                                     // p_upper
2484     PtrTy,                                     // p_stride
2485     ITy,                                       // incr
2486     ITy                                        // chunk
2487   };
2488   auto *FnTy =
2489       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490   return CGM.CreateRuntimeFunction(FnTy, Name);
2491 }
2492 
2493 llvm::FunctionCallee
2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495   assert((IVSize == 32 || IVSize == 64) &&
2496          "IV size is not compatible with the omp runtime");
2497   StringRef Name =
2498       IVSize == 32
2499           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2500           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2501   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2502   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2503                                CGM.Int32Ty,           // tid
2504                                CGM.Int32Ty,           // schedtype
2505                                ITy,                   // lower
2506                                ITy,                   // upper
2507                                ITy,                   // stride
2508                                ITy                    // chunk
2509   };
2510   auto *FnTy =
2511       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512   return CGM.CreateRuntimeFunction(FnTy, Name);
2513 }
2514 
2515 llvm::FunctionCallee
2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517   assert((IVSize == 32 || IVSize == 64) &&
2518          "IV size is not compatible with the omp runtime");
2519   StringRef Name =
2520       IVSize == 32
2521           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2522           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2523   llvm::Type *TypeParams[] = {
2524       getIdentTyPointerTy(), // loc
2525       CGM.Int32Ty,           // tid
2526   };
2527   auto *FnTy =
2528       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529   return CGM.CreateRuntimeFunction(FnTy, Name);
2530 }
2531 
2532 llvm::FunctionCallee
2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534   assert((IVSize == 32 || IVSize == 64) &&
2535          "IV size is not compatible with the omp runtime");
2536   StringRef Name =
2537       IVSize == 32
2538           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2539           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2540   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2541   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542   llvm::Type *TypeParams[] = {
2543     getIdentTyPointerTy(),                     // loc
2544     CGM.Int32Ty,                               // tid
2545     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2546     PtrTy,                                     // p_lower
2547     PtrTy,                                     // p_upper
2548     PtrTy                                      // p_stride
2549   };
2550   auto *FnTy =
2551       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552   return CGM.CreateRuntimeFunction(FnTy, Name);
2553 }
2554 
2555 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2556   if (CGM.getLangOpts().OpenMPSimd)
2557     return Address::invalid();
2558   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2559       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2560   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2561               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2562                HasRequiresUnifiedSharedMemory))) {
2563     SmallString<64> PtrName;
2564     {
2565       llvm::raw_svector_ostream OS(PtrName);
2566       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
2567     }
2568     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2569     if (!Ptr) {
2570       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2571       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2572                                         PtrName);
2573       if (!CGM.getLangOpts().OpenMPIsDevice) {
2574         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2575         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2576         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2577       }
2578       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2579       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2580     }
2581     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2582   }
2583   return Address::invalid();
2584 }
2585 
2586 llvm::Constant *
2587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2588   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2589          !CGM.getContext().getTargetInfo().isTLSSupported());
2590   // Lookup the entry, lazily creating it if necessary.
2591   std::string Suffix = getName({"cache", ""});
2592   return getOrCreateInternalVariable(
2593       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2594 }
2595 
2596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2597                                                 const VarDecl *VD,
2598                                                 Address VDAddr,
2599                                                 SourceLocation Loc) {
2600   if (CGM.getLangOpts().OpenMPUseTLS &&
2601       CGM.getContext().getTargetInfo().isTLSSupported())
2602     return VDAddr;
2603 
2604   llvm::Type *VarTy = VDAddr.getElementType();
2605   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2606                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2607                                                        CGM.Int8PtrTy),
2608                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2609                          getOrCreateThreadPrivateCache(VD)};
2610   return Address(CGF.EmitRuntimeCall(
2611       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2612                  VDAddr.getAlignment());
2613 }
2614 
2615 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2616     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2617     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2618   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2619   // library.
2620   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2621   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2622                       OMPLoc);
2623   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2624   // to register constructor/destructor for variable.
2625   llvm::Value *Args[] = {
2626       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2627       Ctor, CopyCtor, Dtor};
2628   CGF.EmitRuntimeCall(
2629       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2630 }
2631 
2632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2633     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2634     bool PerformInit, CodeGenFunction *CGF) {
2635   if (CGM.getLangOpts().OpenMPUseTLS &&
2636       CGM.getContext().getTargetInfo().isTLSSupported())
2637     return nullptr;
2638 
2639   VD = VD->getDefinition(CGM.getContext());
2640   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2641     QualType ASTTy = VD->getType();
2642 
2643     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2644     const Expr *Init = VD->getAnyInitializer();
2645     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2646       // Generate function that re-emits the declaration's initializer into the
2647       // threadprivate copy of the variable VD
2648       CodeGenFunction CtorCGF(CGM);
2649       FunctionArgList Args;
2650       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2651                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2652                             ImplicitParamDecl::Other);
2653       Args.push_back(&Dst);
2654 
2655       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2656           CGM.getContext().VoidPtrTy, Args);
2657       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2658       std::string Name = getName({"__kmpc_global_ctor_", ""});
2659       llvm::Function *Fn =
2660           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2661       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2662                             Args, Loc, Loc);
2663       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2664           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2665           CGM.getContext().VoidPtrTy, Dst.getLocation());
2666       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2667       Arg = CtorCGF.Builder.CreateElementBitCast(
2668           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2669       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2670                                /*IsInitializer=*/true);
2671       ArgVal = CtorCGF.EmitLoadOfScalar(
2672           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2673           CGM.getContext().VoidPtrTy, Dst.getLocation());
2674       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2675       CtorCGF.FinishFunction();
2676       Ctor = Fn;
2677     }
2678     if (VD->getType().isDestructedType() != QualType::DK_none) {
2679       // Generate function that emits destructor call for the threadprivate copy
2680       // of the variable VD
2681       CodeGenFunction DtorCGF(CGM);
2682       FunctionArgList Args;
2683       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2684                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2685                             ImplicitParamDecl::Other);
2686       Args.push_back(&Dst);
2687 
2688       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2689           CGM.getContext().VoidTy, Args);
2690       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2691       std::string Name = getName({"__kmpc_global_dtor_", ""});
2692       llvm::Function *Fn =
2693           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2694       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2695       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2696                             Loc, Loc);
2697       // Create a scope with an artificial location for the body of this function.
2698       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2699       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2700           DtorCGF.GetAddrOfLocalVar(&Dst),
2701           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2702       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2703                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2704                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2705       DtorCGF.FinishFunction();
2706       Dtor = Fn;
2707     }
2708     // Do not emit init function if it is not required.
2709     if (!Ctor && !Dtor)
2710       return nullptr;
2711 
2712     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2713     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2714                                                /*isVarArg=*/false)
2715                            ->getPointerTo();
2716     // Copying constructor for the threadprivate variable.
2717     // Must be NULL - reserved by runtime, but currently it requires that this
2718     // parameter is always NULL. Otherwise it fires assertion.
2719     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2720     if (Ctor == nullptr) {
2721       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2722                                              /*isVarArg=*/false)
2723                          ->getPointerTo();
2724       Ctor = llvm::Constant::getNullValue(CtorTy);
2725     }
2726     if (Dtor == nullptr) {
2727       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2728                                              /*isVarArg=*/false)
2729                          ->getPointerTo();
2730       Dtor = llvm::Constant::getNullValue(DtorTy);
2731     }
2732     if (!CGF) {
2733       auto *InitFunctionTy =
2734           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2735       std::string Name = getName({"__omp_threadprivate_init_", ""});
2736       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2737           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2738       CodeGenFunction InitCGF(CGM);
2739       FunctionArgList ArgList;
2740       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2741                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2742                             Loc, Loc);
2743       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2744       InitCGF.FinishFunction();
2745       return InitFunction;
2746     }
2747     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2748   }
2749   return nullptr;
2750 }
2751 
2752 /// Obtain information that uniquely identifies a target entry. This
2753 /// consists of the file and device IDs as well as line number associated with
2754 /// the relevant entry source location.
2755 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2756                                      unsigned &DeviceID, unsigned &FileID,
2757                                      unsigned &LineNum) {
2758   SourceManager &SM = C.getSourceManager();
2759 
2760   // The loc should be always valid and have a file ID (the user cannot use
2761   // #pragma directives in macros)
2762 
2763   assert(Loc.isValid() && "Source location is expected to be always valid.");
2764 
2765   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2766   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2767 
2768   llvm::sys::fs::UniqueID ID;
2769   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2770     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2771         << PLoc.getFilename() << EC.message();
2772 
2773   DeviceID = ID.getDevice();
2774   FileID = ID.getFile();
2775   LineNum = PLoc.getLine();
2776 }
2777 
2778 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2779                                                      llvm::GlobalVariable *Addr,
2780                                                      bool PerformInit) {
2781   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2782       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2783   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2784       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2785        HasRequiresUnifiedSharedMemory))
2786     return CGM.getLangOpts().OpenMPIsDevice;
2787   VD = VD->getDefinition(CGM.getContext());
2788   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2789     return CGM.getLangOpts().OpenMPIsDevice;
2790 
2791   QualType ASTTy = VD->getType();
2792 
2793   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2794   // Produce the unique prefix to identify the new target regions. We use
2795   // the source location of the variable declaration which we know to not
2796   // conflict with any target region.
2797   unsigned DeviceID;
2798   unsigned FileID;
2799   unsigned Line;
2800   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2801   SmallString<128> Buffer, Out;
2802   {
2803     llvm::raw_svector_ostream OS(Buffer);
2804     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2805        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2806   }
2807 
2808   const Expr *Init = VD->getAnyInitializer();
2809   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2810     llvm::Constant *Ctor;
2811     llvm::Constant *ID;
2812     if (CGM.getLangOpts().OpenMPIsDevice) {
2813       // Generate function that re-emits the declaration's initializer into
2814       // the threadprivate copy of the variable VD
2815       CodeGenFunction CtorCGF(CGM);
2816 
2817       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2818       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2819       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2820           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2821       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2822       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2823                             FunctionArgList(), Loc, Loc);
2824       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2825       CtorCGF.EmitAnyExprToMem(Init,
2826                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2827                                Init->getType().getQualifiers(),
2828                                /*IsInitializer=*/true);
2829       CtorCGF.FinishFunction();
2830       Ctor = Fn;
2831       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2832       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2833     } else {
2834       Ctor = new llvm::GlobalVariable(
2835           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2836           llvm::GlobalValue::PrivateLinkage,
2837           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2838       ID = Ctor;
2839     }
2840 
2841     // Register the information for the entry associated with the constructor.
2842     Out.clear();
2843     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2844         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2845         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2846   }
2847   if (VD->getType().isDestructedType() != QualType::DK_none) {
2848     llvm::Constant *Dtor;
2849     llvm::Constant *ID;
2850     if (CGM.getLangOpts().OpenMPIsDevice) {
2851       // Generate function that emits destructor call for the threadprivate
2852       // copy of the variable VD
2853       CodeGenFunction DtorCGF(CGM);
2854 
2855       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2856       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2857       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2858           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2859       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2860       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2861                             FunctionArgList(), Loc, Loc);
2862       // Create a scope with an artificial location for the body of this
2863       // function.
2864       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2865       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2866                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2867                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2868       DtorCGF.FinishFunction();
2869       Dtor = Fn;
2870       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2871       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2872     } else {
2873       Dtor = new llvm::GlobalVariable(
2874           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2875           llvm::GlobalValue::PrivateLinkage,
2876           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2877       ID = Dtor;
2878     }
2879     // Register the information for the entry associated with the destructor.
2880     Out.clear();
2881     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2882         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2883         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2884   }
2885   return CGM.getLangOpts().OpenMPIsDevice;
2886 }
2887 
2888 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2889                                                           QualType VarType,
2890                                                           StringRef Name) {
2891   std::string Suffix = getName({"artificial", ""});
2892   std::string CacheSuffix = getName({"cache", ""});
2893   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2894   llvm::Value *GAddr =
2895       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2896   llvm::Value *Args[] = {
2897       emitUpdateLocation(CGF, SourceLocation()),
2898       getThreadID(CGF, SourceLocation()),
2899       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2900       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2901                                 /*IsSigned=*/false),
2902       getOrCreateInternalVariable(
2903           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2904   return Address(
2905       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2906           CGF.EmitRuntimeCall(
2907               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2908           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2909       CGM.getPointerAlign());
2910 }
2911 
2912 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2913                                       const RegionCodeGenTy &ThenGen,
2914                                       const RegionCodeGenTy &ElseGen) {
2915   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2916 
2917   // If the condition constant folds and can be elided, try to avoid emitting
2918   // the condition and the dead arm of the if/else.
2919   bool CondConstant;
2920   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2921     if (CondConstant)
2922       ThenGen(CGF);
2923     else
2924       ElseGen(CGF);
2925     return;
2926   }
2927 
2928   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2929   // emit the conditional branch.
2930   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2931   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2932   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2933   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2934 
2935   // Emit the 'then' code.
2936   CGF.EmitBlock(ThenBlock);
2937   ThenGen(CGF);
2938   CGF.EmitBranch(ContBlock);
2939   // Emit the 'else' code if present.
2940   // There is no need to emit line number for unconditional branch.
2941   (void)ApplyDebugLocation::CreateEmpty(CGF);
2942   CGF.EmitBlock(ElseBlock);
2943   ElseGen(CGF);
2944   // There is no need to emit line number for unconditional branch.
2945   (void)ApplyDebugLocation::CreateEmpty(CGF);
2946   CGF.EmitBranch(ContBlock);
2947   // Emit the continuation block for code after the if.
2948   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2949 }
2950 
2951 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2952                                        llvm::Function *OutlinedFn,
2953                                        ArrayRef<llvm::Value *> CapturedVars,
2954                                        const Expr *IfCond) {
2955   if (!CGF.HaveInsertPoint())
2956     return;
2957   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2958   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2959                                                      PrePostActionTy &) {
2960     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2961     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2962     llvm::Value *Args[] = {
2963         RTLoc,
2964         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2965         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2966     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2967     RealArgs.append(std::begin(Args), std::end(Args));
2968     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2969 
2970     llvm::FunctionCallee RTLFn =
2971         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2972     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2973   };
2974   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2975                                                           PrePostActionTy &) {
2976     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2977     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2978     // Build calls:
2979     // __kmpc_serialized_parallel(&Loc, GTid);
2980     llvm::Value *Args[] = {RTLoc, ThreadID};
2981     CGF.EmitRuntimeCall(
2982         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2983 
2984     // OutlinedFn(&GTid, &zero, CapturedStruct);
2985     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2986                                                         /*Name*/ ".zero.addr");
2987     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2988     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2989     // ThreadId for serialized parallels is 0.
2990     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2991     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2992     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2993     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2994 
2995     // __kmpc_end_serialized_parallel(&Loc, GTid);
2996     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2997     CGF.EmitRuntimeCall(
2998         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2999         EndArgs);
3000   };
3001   if (IfCond) {
3002     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3003   } else {
3004     RegionCodeGenTy ThenRCG(ThenGen);
3005     ThenRCG(CGF);
3006   }
3007 }
3008 
3009 // If we're inside an (outlined) parallel region, use the region info's
3010 // thread-ID variable (it is passed in a first argument of the outlined function
3011 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3012 // regular serial code region, get thread ID by calling kmp_int32
3013 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3014 // return the address of that temp.
3015 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3016                                              SourceLocation Loc) {
3017   if (auto *OMPRegionInfo =
3018           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3019     if (OMPRegionInfo->getThreadIDVariable())
3020       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3021 
3022   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3023   QualType Int32Ty =
3024       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3025   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3026   CGF.EmitStoreOfScalar(ThreadID,
3027                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3028 
3029   return ThreadIDTemp;
3030 }
3031 
3032 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3033     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3034   SmallString<256> Buffer;
3035   llvm::raw_svector_ostream Out(Buffer);
3036   Out << Name;
3037   StringRef RuntimeName = Out.str();
3038   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3039   if (Elem.second) {
3040     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3041            "OMP internal variable has different type than requested");
3042     return &*Elem.second;
3043   }
3044 
3045   return Elem.second = new llvm::GlobalVariable(
3046              CGM.getModule(), Ty, /*IsConstant*/ false,
3047              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3048              Elem.first(), /*InsertBefore=*/nullptr,
3049              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3050 }
3051 
3052 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3053   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3054   std::string Name = getName({Prefix, "var"});
3055   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3056 }
3057 
3058 namespace {
3059 /// Common pre(post)-action for different OpenMP constructs.
3060 class CommonActionTy final : public PrePostActionTy {
3061   llvm::FunctionCallee EnterCallee;
3062   ArrayRef<llvm::Value *> EnterArgs;
3063   llvm::FunctionCallee ExitCallee;
3064   ArrayRef<llvm::Value *> ExitArgs;
3065   bool Conditional;
3066   llvm::BasicBlock *ContBlock = nullptr;
3067 
3068 public:
3069   CommonActionTy(llvm::FunctionCallee EnterCallee,
3070                  ArrayRef<llvm::Value *> EnterArgs,
3071                  llvm::FunctionCallee ExitCallee,
3072                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3073       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3074         ExitArgs(ExitArgs), Conditional(Conditional) {}
3075   void Enter(CodeGenFunction &CGF) override {
3076     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3077     if (Conditional) {
3078       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3079       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3080       ContBlock = CGF.createBasicBlock("omp_if.end");
3081       // Generate the branch (If-stmt)
3082       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3083       CGF.EmitBlock(ThenBlock);
3084     }
3085   }
3086   void Done(CodeGenFunction &CGF) {
3087     // Emit the rest of blocks/branches
3088     CGF.EmitBranch(ContBlock);
3089     CGF.EmitBlock(ContBlock, true);
3090   }
3091   void Exit(CodeGenFunction &CGF) override {
3092     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3093   }
3094 };
3095 } // anonymous namespace
3096 
3097 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3098                                          StringRef CriticalName,
3099                                          const RegionCodeGenTy &CriticalOpGen,
3100                                          SourceLocation Loc, const Expr *Hint) {
3101   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3102   // CriticalOpGen();
3103   // __kmpc_end_critical(ident_t *, gtid, Lock);
3104   // Prepare arguments and build a call to __kmpc_critical
3105   if (!CGF.HaveInsertPoint())
3106     return;
3107   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3108                          getCriticalRegionLock(CriticalName)};
3109   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3110                                                 std::end(Args));
3111   if (Hint) {
3112     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3113         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3114   }
3115   CommonActionTy Action(
3116       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3117                                  : OMPRTL__kmpc_critical),
3118       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3119   CriticalOpGen.setAction(Action);
3120   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3121 }
3122 
3123 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3124                                        const RegionCodeGenTy &MasterOpGen,
3125                                        SourceLocation Loc) {
3126   if (!CGF.HaveInsertPoint())
3127     return;
3128   // if(__kmpc_master(ident_t *, gtid)) {
3129   //   MasterOpGen();
3130   //   __kmpc_end_master(ident_t *, gtid);
3131   // }
3132   // Prepare arguments and build a call to __kmpc_master
3133   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3135                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3136                         /*Conditional=*/true);
3137   MasterOpGen.setAction(Action);
3138   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3139   Action.Done(CGF);
3140 }
3141 
3142 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3143                                         SourceLocation Loc) {
3144   if (!CGF.HaveInsertPoint())
3145     return;
3146   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3147   llvm::Value *Args[] = {
3148       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3149       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3150   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3151   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3152     Region->emitUntiedSwitch(CGF);
3153 }
3154 
3155 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3156                                           const RegionCodeGenTy &TaskgroupOpGen,
3157                                           SourceLocation Loc) {
3158   if (!CGF.HaveInsertPoint())
3159     return;
3160   // __kmpc_taskgroup(ident_t *, gtid);
3161   // TaskgroupOpGen();
3162   // __kmpc_end_taskgroup(ident_t *, gtid);
3163   // Prepare arguments and build a call to __kmpc_taskgroup
3164   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3165   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3166                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3167                         Args);
3168   TaskgroupOpGen.setAction(Action);
3169   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3170 }
3171 
3172 /// Given an array of pointers to variables, project the address of a
3173 /// given variable.
3174 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3175                                       unsigned Index, const VarDecl *Var) {
3176   // Pull out the pointer to the variable.
3177   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3178   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3179 
3180   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3181   Addr = CGF.Builder.CreateElementBitCast(
3182       Addr, CGF.ConvertTypeForMem(Var->getType()));
3183   return Addr;
3184 }
3185 
3186 static llvm::Value *emitCopyprivateCopyFunction(
3187     CodeGenModule &CGM, llvm::Type *ArgsType,
3188     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3189     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3190     SourceLocation Loc) {
3191   ASTContext &C = CGM.getContext();
3192   // void copy_func(void *LHSArg, void *RHSArg);
3193   FunctionArgList Args;
3194   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3195                            ImplicitParamDecl::Other);
3196   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3197                            ImplicitParamDecl::Other);
3198   Args.push_back(&LHSArg);
3199   Args.push_back(&RHSArg);
3200   const auto &CGFI =
3201       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202   std::string Name =
3203       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3204   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3205                                     llvm::GlobalValue::InternalLinkage, Name,
3206                                     &CGM.getModule());
3207   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3208   Fn->setDoesNotRecurse();
3209   CodeGenFunction CGF(CGM);
3210   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3211   // Dest = (void*[n])(LHSArg);
3212   // Src = (void*[n])(RHSArg);
3213   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3214       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3215       ArgsType), CGF.getPointerAlign());
3216   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3218       ArgsType), CGF.getPointerAlign());
3219   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3220   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3221   // ...
3222   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3223   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3224     const auto *DestVar =
3225         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3226     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3227 
3228     const auto *SrcVar =
3229         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3230     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3231 
3232     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3233     QualType Type = VD->getType();
3234     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3235   }
3236   CGF.FinishFunction();
3237   return Fn;
3238 }
3239 
3240 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3241                                        const RegionCodeGenTy &SingleOpGen,
3242                                        SourceLocation Loc,
3243                                        ArrayRef<const Expr *> CopyprivateVars,
3244                                        ArrayRef<const Expr *> SrcExprs,
3245                                        ArrayRef<const Expr *> DstExprs,
3246                                        ArrayRef<const Expr *> AssignmentOps) {
3247   if (!CGF.HaveInsertPoint())
3248     return;
3249   assert(CopyprivateVars.size() == SrcExprs.size() &&
3250          CopyprivateVars.size() == DstExprs.size() &&
3251          CopyprivateVars.size() == AssignmentOps.size());
3252   ASTContext &C = CGM.getContext();
3253   // int32 did_it = 0;
3254   // if(__kmpc_single(ident_t *, gtid)) {
3255   //   SingleOpGen();
3256   //   __kmpc_end_single(ident_t *, gtid);
3257   //   did_it = 1;
3258   // }
3259   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3260   // <copy_func>, did_it);
3261 
3262   Address DidIt = Address::invalid();
3263   if (!CopyprivateVars.empty()) {
3264     // int32 did_it = 0;
3265     QualType KmpInt32Ty =
3266         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3267     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3268     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3269   }
3270   // Prepare arguments and build a call to __kmpc_single
3271   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3273                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3274                         /*Conditional=*/true);
3275   SingleOpGen.setAction(Action);
3276   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3277   if (DidIt.isValid()) {
3278     // did_it = 1;
3279     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3280   }
3281   Action.Done(CGF);
3282   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3283   // <copy_func>, did_it);
3284   if (DidIt.isValid()) {
3285     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3286     QualType CopyprivateArrayTy =
3287         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3288                                /*IndexTypeQuals=*/0);
3289     // Create a list of all private variables for copyprivate.
3290     Address CopyprivateList =
3291         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3292     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3293       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3294       CGF.Builder.CreateStore(
3295           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3296               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3297           Elem);
3298     }
3299     // Build function that copies private values from single region to all other
3300     // threads in the corresponding parallel region.
3301     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3302         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3303         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3304     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3305     Address CL =
3306       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3307                                                       CGF.VoidPtrTy);
3308     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3309     llvm::Value *Args[] = {
3310         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3311         getThreadID(CGF, Loc),        // i32 <gtid>
3312         BufSize,                      // size_t <buf_size>
3313         CL.getPointer(),              // void *<copyprivate list>
3314         CpyFn,                        // void (*) (void *, void *) <copy_func>
3315         DidItVal                      // i32 did_it
3316     };
3317     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3318   }
3319 }
3320 
3321 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3322                                         const RegionCodeGenTy &OrderedOpGen,
3323                                         SourceLocation Loc, bool IsThreads) {
3324   if (!CGF.HaveInsertPoint())
3325     return;
3326   // __kmpc_ordered(ident_t *, gtid);
3327   // OrderedOpGen();
3328   // __kmpc_end_ordered(ident_t *, gtid);
3329   // Prepare arguments and build a call to __kmpc_ordered
3330   if (IsThreads) {
3331     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3332     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3333                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3334                           Args);
3335     OrderedOpGen.setAction(Action);
3336     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3337     return;
3338   }
3339   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3340 }
3341 
3342 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3343   unsigned Flags;
3344   if (Kind == OMPD_for)
3345     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3346   else if (Kind == OMPD_sections)
3347     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3348   else if (Kind == OMPD_single)
3349     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3350   else if (Kind == OMPD_barrier)
3351     Flags = OMP_IDENT_BARRIER_EXPL;
3352   else
3353     Flags = OMP_IDENT_BARRIER_IMPL;
3354   return Flags;
3355 }
3356 
3357 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3358     CodeGenFunction &CGF, const OMPLoopDirective &S,
3359     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3360   // Check if the loop directive is actually a doacross loop directive. In this
3361   // case choose static, 1 schedule.
3362   if (llvm::any_of(
3363           S.getClausesOfKind<OMPOrderedClause>(),
3364           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3365     ScheduleKind = OMPC_SCHEDULE_static;
3366     // Chunk size is 1 in this case.
3367     llvm::APInt ChunkSize(32, 1);
3368     ChunkExpr = IntegerLiteral::Create(
3369         CGF.getContext(), ChunkSize,
3370         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3371         SourceLocation());
3372   }
3373 }
3374 
3375 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3376                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3377                                       bool ForceSimpleCall) {
3378   if (!CGF.HaveInsertPoint())
3379     return;
3380   // Build call __kmpc_cancel_barrier(loc, thread_id);
3381   // Build call __kmpc_barrier(loc, thread_id);
3382   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3383   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3384   // thread_id);
3385   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3386                          getThreadID(CGF, Loc)};
3387   if (auto *OMPRegionInfo =
3388           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3389     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3390       llvm::Value *Result = CGF.EmitRuntimeCall(
3391           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3392       if (EmitChecks) {
3393         // if (__kmpc_cancel_barrier()) {
3394         //   exit from construct;
3395         // }
3396         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3397         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3398         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3399         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3400         CGF.EmitBlock(ExitBB);
3401         //   exit from construct;
3402         CodeGenFunction::JumpDest CancelDestination =
3403             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3404         CGF.EmitBranchThroughCleanup(CancelDestination);
3405         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3406       }
3407       return;
3408     }
3409   }
3410   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3411 }
3412 
3413 /// Map the OpenMP loop schedule to the runtime enumeration.
3414 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3415                                           bool Chunked, bool Ordered) {
3416   switch (ScheduleKind) {
3417   case OMPC_SCHEDULE_static:
3418     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3419                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3420   case OMPC_SCHEDULE_dynamic:
3421     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3422   case OMPC_SCHEDULE_guided:
3423     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3424   case OMPC_SCHEDULE_runtime:
3425     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3426   case OMPC_SCHEDULE_auto:
3427     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3428   case OMPC_SCHEDULE_unknown:
3429     assert(!Chunked && "chunk was specified but schedule kind not known");
3430     return Ordered ? OMP_ord_static : OMP_sch_static;
3431   }
3432   llvm_unreachable("Unexpected runtime schedule");
3433 }
3434 
3435 /// Map the OpenMP distribute schedule to the runtime enumeration.
3436 static OpenMPSchedType
3437 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3438   // only static is allowed for dist_schedule
3439   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3440 }
3441 
3442 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3443                                          bool Chunked) const {
3444   OpenMPSchedType Schedule =
3445       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3446   return Schedule == OMP_sch_static;
3447 }
3448 
3449 bool CGOpenMPRuntime::isStaticNonchunked(
3450     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3451   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3452   return Schedule == OMP_dist_sch_static;
3453 }
3454 
3455 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3456                                       bool Chunked) const {
3457   OpenMPSchedType Schedule =
3458       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3459   return Schedule == OMP_sch_static_chunked;
3460 }
3461 
3462 bool CGOpenMPRuntime::isStaticChunked(
3463     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3464   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3465   return Schedule == OMP_dist_sch_static_chunked;
3466 }
3467 
3468 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3469   OpenMPSchedType Schedule =
3470       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3471   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3472   return Schedule != OMP_sch_static;
3473 }
3474 
3475 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3476                                   OpenMPScheduleClauseModifier M1,
3477                                   OpenMPScheduleClauseModifier M2) {
3478   int Modifier = 0;
3479   switch (M1) {
3480   case OMPC_SCHEDULE_MODIFIER_monotonic:
3481     Modifier = OMP_sch_modifier_monotonic;
3482     break;
3483   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3484     Modifier = OMP_sch_modifier_nonmonotonic;
3485     break;
3486   case OMPC_SCHEDULE_MODIFIER_simd:
3487     if (Schedule == OMP_sch_static_chunked)
3488       Schedule = OMP_sch_static_balanced_chunked;
3489     break;
3490   case OMPC_SCHEDULE_MODIFIER_last:
3491   case OMPC_SCHEDULE_MODIFIER_unknown:
3492     break;
3493   }
3494   switch (M2) {
3495   case OMPC_SCHEDULE_MODIFIER_monotonic:
3496     Modifier = OMP_sch_modifier_monotonic;
3497     break;
3498   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3499     Modifier = OMP_sch_modifier_nonmonotonic;
3500     break;
3501   case OMPC_SCHEDULE_MODIFIER_simd:
3502     if (Schedule == OMP_sch_static_chunked)
3503       Schedule = OMP_sch_static_balanced_chunked;
3504     break;
3505   case OMPC_SCHEDULE_MODIFIER_last:
3506   case OMPC_SCHEDULE_MODIFIER_unknown:
3507     break;
3508   }
3509   return Schedule | Modifier;
3510 }
3511 
3512 void CGOpenMPRuntime::emitForDispatchInit(
3513     CodeGenFunction &CGF, SourceLocation Loc,
3514     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3515     bool Ordered, const DispatchRTInput &DispatchValues) {
3516   if (!CGF.HaveInsertPoint())
3517     return;
3518   OpenMPSchedType Schedule = getRuntimeSchedule(
3519       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3520   assert(Ordered ||
3521          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3522           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3523           Schedule != OMP_sch_static_balanced_chunked));
3524   // Call __kmpc_dispatch_init(
3525   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3526   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3527   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3528 
3529   // If the Chunk was not specified in the clause - use default value 1.
3530   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3531                                             : CGF.Builder.getIntN(IVSize, 1);
3532   llvm::Value *Args[] = {
3533       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3534       CGF.Builder.getInt32(addMonoNonMonoModifier(
3535           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3536       DispatchValues.LB,                                // Lower
3537       DispatchValues.UB,                                // Upper
3538       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3539       Chunk                                             // Chunk
3540   };
3541   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3542 }
3543 
3544 static void emitForStaticInitCall(
3545     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3546     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3547     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3548     const CGOpenMPRuntime::StaticRTInput &Values) {
3549   if (!CGF.HaveInsertPoint())
3550     return;
3551 
3552   assert(!Values.Ordered);
3553   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3554          Schedule == OMP_sch_static_balanced_chunked ||
3555          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3556          Schedule == OMP_dist_sch_static ||
3557          Schedule == OMP_dist_sch_static_chunked);
3558 
3559   // Call __kmpc_for_static_init(
3560   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3561   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3562   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3563   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3564   llvm::Value *Chunk = Values.Chunk;
3565   if (Chunk == nullptr) {
3566     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3567             Schedule == OMP_dist_sch_static) &&
3568            "expected static non-chunked schedule");
3569     // If the Chunk was not specified in the clause - use default value 1.
3570     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3571   } else {
3572     assert((Schedule == OMP_sch_static_chunked ||
3573             Schedule == OMP_sch_static_balanced_chunked ||
3574             Schedule == OMP_ord_static_chunked ||
3575             Schedule == OMP_dist_sch_static_chunked) &&
3576            "expected static chunked schedule");
3577   }
3578   llvm::Value *Args[] = {
3579       UpdateLocation,
3580       ThreadId,
3581       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3582                                                   M2)), // Schedule type
3583       Values.IL.getPointer(),                           // &isLastIter
3584       Values.LB.getPointer(),                           // &LB
3585       Values.UB.getPointer(),                           // &UB
3586       Values.ST.getPointer(),                           // &Stride
3587       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3588       Chunk                                             // Chunk
3589   };
3590   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3591 }
3592 
3593 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3594                                         SourceLocation Loc,
3595                                         OpenMPDirectiveKind DKind,
3596                                         const OpenMPScheduleTy &ScheduleKind,
3597                                         const StaticRTInput &Values) {
3598   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3599       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3600   assert(isOpenMPWorksharingDirective(DKind) &&
3601          "Expected loop-based or sections-based directive.");
3602   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3603                                              isOpenMPLoopDirective(DKind)
3604                                                  ? OMP_IDENT_WORK_LOOP
3605                                                  : OMP_IDENT_WORK_SECTIONS);
3606   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3607   llvm::FunctionCallee StaticInitFunction =
3608       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3609   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3610                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3611 }
3612 
3613 void CGOpenMPRuntime::emitDistributeStaticInit(
3614     CodeGenFunction &CGF, SourceLocation Loc,
3615     OpenMPDistScheduleClauseKind SchedKind,
3616     const CGOpenMPRuntime::StaticRTInput &Values) {
3617   OpenMPSchedType ScheduleNum =
3618       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3619   llvm::Value *UpdatedLocation =
3620       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3621   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3622   llvm::FunctionCallee StaticInitFunction =
3623       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3624   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3625                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3626                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3627 }
3628 
3629 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3630                                           SourceLocation Loc,
3631                                           OpenMPDirectiveKind DKind) {
3632   if (!CGF.HaveInsertPoint())
3633     return;
3634   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3635   llvm::Value *Args[] = {
3636       emitUpdateLocation(CGF, Loc,
3637                          isOpenMPDistributeDirective(DKind)
3638                              ? OMP_IDENT_WORK_DISTRIBUTE
3639                              : isOpenMPLoopDirective(DKind)
3640                                    ? OMP_IDENT_WORK_LOOP
3641                                    : OMP_IDENT_WORK_SECTIONS),
3642       getThreadID(CGF, Loc)};
3643   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3644                       Args);
3645 }
3646 
3647 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3648                                                  SourceLocation Loc,
3649                                                  unsigned IVSize,
3650                                                  bool IVSigned) {
3651   if (!CGF.HaveInsertPoint())
3652     return;
3653   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3654   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3655   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3656 }
3657 
3658 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3659                                           SourceLocation Loc, unsigned IVSize,
3660                                           bool IVSigned, Address IL,
3661                                           Address LB, Address UB,
3662                                           Address ST) {
3663   // Call __kmpc_dispatch_next(
3664   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3665   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3666   //          kmp_int[32|64] *p_stride);
3667   llvm::Value *Args[] = {
3668       emitUpdateLocation(CGF, Loc),
3669       getThreadID(CGF, Loc),
3670       IL.getPointer(), // &isLastIter
3671       LB.getPointer(), // &Lower
3672       UB.getPointer(), // &Upper
3673       ST.getPointer()  // &Stride
3674   };
3675   llvm::Value *Call =
3676       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3677   return CGF.EmitScalarConversion(
3678       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3679       CGF.getContext().BoolTy, Loc);
3680 }
3681 
3682 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3683                                            llvm::Value *NumThreads,
3684                                            SourceLocation Loc) {
3685   if (!CGF.HaveInsertPoint())
3686     return;
3687   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3688   llvm::Value *Args[] = {
3689       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3690       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3691   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3692                       Args);
3693 }
3694 
3695 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3696                                          OpenMPProcBindClauseKind ProcBind,
3697                                          SourceLocation Loc) {
3698   if (!CGF.HaveInsertPoint())
3699     return;
3700   // Constants for proc bind value accepted by the runtime.
3701   enum ProcBindTy {
3702     ProcBindFalse = 0,
3703     ProcBindTrue,
3704     ProcBindMaster,
3705     ProcBindClose,
3706     ProcBindSpread,
3707     ProcBindIntel,
3708     ProcBindDefault
3709   } RuntimeProcBind;
3710   switch (ProcBind) {
3711   case OMPC_PROC_BIND_master:
3712     RuntimeProcBind = ProcBindMaster;
3713     break;
3714   case OMPC_PROC_BIND_close:
3715     RuntimeProcBind = ProcBindClose;
3716     break;
3717   case OMPC_PROC_BIND_spread:
3718     RuntimeProcBind = ProcBindSpread;
3719     break;
3720   case OMPC_PROC_BIND_unknown:
3721     llvm_unreachable("Unsupported proc_bind value.");
3722   }
3723   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3724   llvm::Value *Args[] = {
3725       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3726       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3727   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3728 }
3729 
3730 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3731                                 SourceLocation Loc) {
3732   if (!CGF.HaveInsertPoint())
3733     return;
3734   // Build call void __kmpc_flush(ident_t *loc)
3735   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3736                       emitUpdateLocation(CGF, Loc));
3737 }
3738 
3739 namespace {
3740 /// Indexes of fields for type kmp_task_t.
3741 enum KmpTaskTFields {
3742   /// List of shared variables.
3743   KmpTaskTShareds,
3744   /// Task routine.
3745   KmpTaskTRoutine,
3746   /// Partition id for the untied tasks.
3747   KmpTaskTPartId,
3748   /// Function with call of destructors for private variables.
3749   Data1,
3750   /// Task priority.
3751   Data2,
3752   /// (Taskloops only) Lower bound.
3753   KmpTaskTLowerBound,
3754   /// (Taskloops only) Upper bound.
3755   KmpTaskTUpperBound,
3756   /// (Taskloops only) Stride.
3757   KmpTaskTStride,
3758   /// (Taskloops only) Is last iteration flag.
3759   KmpTaskTLastIter,
3760   /// (Taskloops only) Reduction data.
3761   KmpTaskTReductions,
3762 };
3763 } // anonymous namespace
3764 
3765 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3766   return OffloadEntriesTargetRegion.empty() &&
3767          OffloadEntriesDeviceGlobalVar.empty();
3768 }
3769 
3770 /// Initialize target region entry.
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3772     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3773                                     StringRef ParentName, unsigned LineNum,
3774                                     unsigned Order) {
3775   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3776                                              "only required for the device "
3777                                              "code generation.");
3778   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3779       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3780                                    OMPTargetRegionEntryTargetRegion);
3781   ++OffloadingEntriesNum;
3782 }
3783 
3784 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3785     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3786                                   StringRef ParentName, unsigned LineNum,
3787                                   llvm::Constant *Addr, llvm::Constant *ID,
3788                                   OMPTargetRegionEntryKind Flags) {
3789   // If we are emitting code for a target, the entry is already initialized,
3790   // only has to be registered.
3791   if (CGM.getLangOpts().OpenMPIsDevice) {
3792     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3793       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3794           DiagnosticsEngine::Error,
3795           "Unable to find target region on line '%0' in the device code.");
3796       CGM.getDiags().Report(DiagID) << LineNum;
3797       return;
3798     }
3799     auto &Entry =
3800         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3801     assert(Entry.isValid() && "Entry not initialized!");
3802     Entry.setAddress(Addr);
3803     Entry.setID(ID);
3804     Entry.setFlags(Flags);
3805   } else {
3806     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3807     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3808     ++OffloadingEntriesNum;
3809   }
3810 }
3811 
3812 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3813     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3814     unsigned LineNum) const {
3815   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3816   if (PerDevice == OffloadEntriesTargetRegion.end())
3817     return false;
3818   auto PerFile = PerDevice->second.find(FileID);
3819   if (PerFile == PerDevice->second.end())
3820     return false;
3821   auto PerParentName = PerFile->second.find(ParentName);
3822   if (PerParentName == PerFile->second.end())
3823     return false;
3824   auto PerLine = PerParentName->second.find(LineNum);
3825   if (PerLine == PerParentName->second.end())
3826     return false;
3827   // Fail if this entry is already registered.
3828   if (PerLine->second.getAddress() || PerLine->second.getID())
3829     return false;
3830   return true;
3831 }
3832 
3833 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3834     const OffloadTargetRegionEntryInfoActTy &Action) {
3835   // Scan all target region entries and perform the provided action.
3836   for (const auto &D : OffloadEntriesTargetRegion)
3837     for (const auto &F : D.second)
3838       for (const auto &P : F.second)
3839         for (const auto &L : P.second)
3840           Action(D.first, F.first, P.first(), L.first, L.second);
3841 }
3842 
3843 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3844     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3845                                        OMPTargetGlobalVarEntryKind Flags,
3846                                        unsigned Order) {
3847   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3848                                              "only required for the device "
3849                                              "code generation.");
3850   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3851   ++OffloadingEntriesNum;
3852 }
3853 
3854 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3855     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3856                                      CharUnits VarSize,
3857                                      OMPTargetGlobalVarEntryKind Flags,
3858                                      llvm::GlobalValue::LinkageTypes Linkage) {
3859   if (CGM.getLangOpts().OpenMPIsDevice) {
3860     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3861     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3862            "Entry not initialized!");
3863     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3864            "Resetting with the new address.");
3865     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3866       if (Entry.getVarSize().isZero()) {
3867         Entry.setVarSize(VarSize);
3868         Entry.setLinkage(Linkage);
3869       }
3870       return;
3871     }
3872     Entry.setVarSize(VarSize);
3873     Entry.setLinkage(Linkage);
3874     Entry.setAddress(Addr);
3875   } else {
3876     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3877       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3878       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3879              "Entry not initialized!");
3880       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3881              "Resetting with the new address.");
3882       if (Entry.getVarSize().isZero()) {
3883         Entry.setVarSize(VarSize);
3884         Entry.setLinkage(Linkage);
3885       }
3886       return;
3887     }
3888     OffloadEntriesDeviceGlobalVar.try_emplace(
3889         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3890     ++OffloadingEntriesNum;
3891   }
3892 }
3893 
3894 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3895     actOnDeviceGlobalVarEntriesInfo(
3896         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3897   // Scan all target region entries and perform the provided action.
3898   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3899     Action(E.getKey(), E.getValue());
3900 }
3901 
3902 llvm::Function *
3903 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3904   // If we don't have entries or if we are emitting code for the device, we
3905   // don't need to do anything.
3906   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3907     return nullptr;
3908 
3909   llvm::Module &M = CGM.getModule();
3910   ASTContext &C = CGM.getContext();
3911 
3912   // Get list of devices we care about
3913   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3914 
3915   // We should be creating an offloading descriptor only if there are devices
3916   // specified.
3917   assert(!Devices.empty() && "No OpenMP offloading devices??");
3918 
3919   // Create the external variables that will point to the begin and end of the
3920   // host entries section. These will be defined by the linker.
3921   llvm::Type *OffloadEntryTy =
3922       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3923   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3924   auto *HostEntriesBegin = new llvm::GlobalVariable(
3925       M, OffloadEntryTy, /*isConstant=*/true,
3926       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3927       EntriesBeginName);
3928   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3929   auto *HostEntriesEnd =
3930       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3931                                llvm::GlobalValue::ExternalLinkage,
3932                                /*Initializer=*/nullptr, EntriesEndName);
3933 
3934   // Create all device images
3935   auto *DeviceImageTy = cast<llvm::StructType>(
3936       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3937   ConstantInitBuilder DeviceImagesBuilder(CGM);
3938   ConstantArrayBuilder DeviceImagesEntries =
3939       DeviceImagesBuilder.beginArray(DeviceImageTy);
3940 
3941   for (const llvm::Triple &Device : Devices) {
3942     StringRef T = Device.getTriple();
3943     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3944     auto *ImgBegin = new llvm::GlobalVariable(
3945         M, CGM.Int8Ty, /*isConstant=*/true,
3946         llvm::GlobalValue::ExternalWeakLinkage,
3947         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3948     std::string EndName = getName({"omp_offloading", "img_end", ""});
3949     auto *ImgEnd = new llvm::GlobalVariable(
3950         M, CGM.Int8Ty, /*isConstant=*/true,
3951         llvm::GlobalValue::ExternalWeakLinkage,
3952         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3953 
3954     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3955                               HostEntriesEnd};
3956     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3957                                              DeviceImagesEntries);
3958   }
3959 
3960   // Create device images global array.
3961   std::string ImagesName = getName({"omp_offloading", "device_images"});
3962   llvm::GlobalVariable *DeviceImages =
3963       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3964                                                 CGM.getPointerAlign(),
3965                                                 /*isConstant=*/true);
3966   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3967 
3968   // This is a Zero array to be used in the creation of the constant expressions
3969   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3970                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3971 
3972   // Create the target region descriptor.
3973   llvm::Constant *Data[] = {
3974       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3975       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3976                                            DeviceImages, Index),
3977       HostEntriesBegin, HostEntriesEnd};
3978   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3979   llvm::GlobalVariable *Desc = createGlobalStruct(
3980       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3981 
3982   // Emit code to register or unregister the descriptor at execution
3983   // startup or closing, respectively.
3984 
3985   llvm::Function *UnRegFn;
3986   {
3987     FunctionArgList Args;
3988     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3989     Args.push_back(&DummyPtr);
3990 
3991     CodeGenFunction CGF(CGM);
3992     // Disable debug info for global (de-)initializer because they are not part
3993     // of some particular construct.
3994     CGF.disableDebugInfo();
3995     const auto &FI =
3996         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3997     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3998     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3999     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4000     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4001     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4002                         Desc);
4003     CGF.FinishFunction();
4004   }
4005   llvm::Function *RegFn;
4006   {
4007     CodeGenFunction CGF(CGM);
4008     // Disable debug info for global (de-)initializer because they are not part
4009     // of some particular construct.
4010     CGF.disableDebugInfo();
4011     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4012     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4013 
4014     // Encode offload target triples into the registration function name. It
4015     // will serve as a comdat key for the registration/unregistration code for
4016     // this particular combination of offloading targets.
4017     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4018     RegFnNameParts[0] = "omp_offloading";
4019     RegFnNameParts[1] = "descriptor_reg";
4020     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4021                     [](const llvm::Triple &T) -> const std::string& {
4022                       return T.getTriple();
4023                     });
4024     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4025     std::string Descriptor = getName(RegFnNameParts);
4026     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4027     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4028     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4029     // Create a variable to drive the registration and unregistration of the
4030     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4031     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4032                                   SourceLocation(), nullptr, C.CharTy,
4033                                   ImplicitParamDecl::Other);
4034     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4035     CGF.FinishFunction();
4036   }
4037   if (CGM.supportsCOMDAT()) {
4038     // It is sufficient to call registration function only once, so create a
4039     // COMDAT group for registration/unregistration functions and associated
4040     // data. That would reduce startup time and code size. Registration
4041     // function serves as a COMDAT group key.
4042     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4043     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4044     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4045     RegFn->setComdat(ComdatKey);
4046     UnRegFn->setComdat(ComdatKey);
4047     DeviceImages->setComdat(ComdatKey);
4048     Desc->setComdat(ComdatKey);
4049   }
4050   return RegFn;
4051 }
4052 
4053 void CGOpenMPRuntime::createOffloadEntry(
4054     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4055     llvm::GlobalValue::LinkageTypes Linkage) {
4056   StringRef Name = Addr->getName();
4057   llvm::Module &M = CGM.getModule();
4058   llvm::LLVMContext &C = M.getContext();
4059 
4060   // Create constant string with the name.
4061   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4062 
4063   std::string StringName = getName({"omp_offloading", "entry_name"});
4064   auto *Str = new llvm::GlobalVariable(
4065       M, StrPtrInit->getType(), /*isConstant=*/true,
4066       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4067   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4068 
4069   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4070                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4071                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4072                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4073                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4074   std::string EntryName = getName({"omp_offloading", "entry", ""});
4075   llvm::GlobalVariable *Entry = createGlobalStruct(
4076       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4077       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4078 
4079   // The entry has to be created in the section the linker expects it to be.
4080   std::string Section = getName({"omp_offloading", "entries"});
4081   Entry->setSection(Section);
4082 }
4083 
4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4085   // Emit the offloading entries and metadata so that the device codegen side
4086   // can easily figure out what to emit. The produced metadata looks like
4087   // this:
4088   //
4089   // !omp_offload.info = !{!1, ...}
4090   //
4091   // Right now we only generate metadata for function that contain target
4092   // regions.
4093 
4094   // If we do not have entries, we don't need to do anything.
4095   if (OffloadEntriesInfoManager.empty())
4096     return;
4097 
4098   llvm::Module &M = CGM.getModule();
4099   llvm::LLVMContext &C = M.getContext();
4100   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4101       OrderedEntries(OffloadEntriesInfoManager.size());
4102   llvm::SmallVector<StringRef, 16> ParentFunctions(
4103       OffloadEntriesInfoManager.size());
4104 
4105   // Auxiliary methods to create metadata values and strings.
4106   auto &&GetMDInt = [this](unsigned V) {
4107     return llvm::ConstantAsMetadata::get(
4108         llvm::ConstantInt::get(CGM.Int32Ty, V));
4109   };
4110 
4111   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4112 
4113   // Create the offloading info metadata node.
4114   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4115 
4116   // Create function that emits metadata for each target region entry;
4117   auto &&TargetRegionMetadataEmitter =
4118       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4119           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4120           unsigned Line,
4121           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4122         // Generate metadata for target regions. Each entry of this metadata
4123         // contains:
4124         // - Entry 0 -> Kind of this type of metadata (0).
4125         // - Entry 1 -> Device ID of the file where the entry was identified.
4126         // - Entry 2 -> File ID of the file where the entry was identified.
4127         // - Entry 3 -> Mangled name of the function where the entry was
4128         // identified.
4129         // - Entry 4 -> Line in the file where the entry was identified.
4130         // - Entry 5 -> Order the entry was created.
4131         // The first element of the metadata node is the kind.
4132         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4133                                  GetMDInt(FileID),      GetMDString(ParentName),
4134                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4135 
4136         // Save this entry in the right position of the ordered entries array.
4137         OrderedEntries[E.getOrder()] = &E;
4138         ParentFunctions[E.getOrder()] = ParentName;
4139 
4140         // Add metadata to the named metadata node.
4141         MD->addOperand(llvm::MDNode::get(C, Ops));
4142       };
4143 
4144   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4145       TargetRegionMetadataEmitter);
4146 
4147   // Create function that emits metadata for each device global variable entry;
4148   auto &&DeviceGlobalVarMetadataEmitter =
4149       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4150        MD](StringRef MangledName,
4151            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4152                &E) {
4153         // Generate metadata for global variables. Each entry of this metadata
4154         // contains:
4155         // - Entry 0 -> Kind of this type of metadata (1).
4156         // - Entry 1 -> Mangled name of the variable.
4157         // - Entry 2 -> Declare target kind.
4158         // - Entry 3 -> Order the entry was created.
4159         // The first element of the metadata node is the kind.
4160         llvm::Metadata *Ops[] = {
4161             GetMDInt(E.getKind()), GetMDString(MangledName),
4162             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4163 
4164         // Save this entry in the right position of the ordered entries array.
4165         OrderedEntries[E.getOrder()] = &E;
4166 
4167         // Add metadata to the named metadata node.
4168         MD->addOperand(llvm::MDNode::get(C, Ops));
4169       };
4170 
4171   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4172       DeviceGlobalVarMetadataEmitter);
4173 
4174   for (const auto *E : OrderedEntries) {
4175     assert(E && "All ordered entries must exist!");
4176     if (const auto *CE =
4177             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4178                 E)) {
4179       if (!CE->getID() || !CE->getAddress()) {
4180         // Do not blame the entry if the parent funtion is not emitted.
4181         StringRef FnName = ParentFunctions[CE->getOrder()];
4182         if (!CGM.GetGlobalValue(FnName))
4183           continue;
4184         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4185             DiagnosticsEngine::Error,
4186             "Offloading entry for target region is incorrect: either the "
4187             "address or the ID is invalid.");
4188         CGM.getDiags().Report(DiagID);
4189         continue;
4190       }
4191       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4192                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4193     } else if (const auto *CE =
4194                    dyn_cast<OffloadEntriesInfoManagerTy::
4195                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4196       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4197           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4198               CE->getFlags());
4199       switch (Flags) {
4200       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4201         if (CGM.getLangOpts().OpenMPIsDevice &&
4202             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4203           continue;
4204         if (!CE->getAddress()) {
4205           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4206               DiagnosticsEngine::Error,
4207               "Offloading entry for declare target variable is incorrect: the "
4208               "address is invalid.");
4209           CGM.getDiags().Report(DiagID);
4210           continue;
4211         }
4212         // The vaiable has no definition - no need to add the entry.
4213         if (CE->getVarSize().isZero())
4214           continue;
4215         break;
4216       }
4217       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4218         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4219                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4220                "Declaret target link address is set.");
4221         if (CGM.getLangOpts().OpenMPIsDevice)
4222           continue;
4223         if (!CE->getAddress()) {
4224           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4225               DiagnosticsEngine::Error,
4226               "Offloading entry for declare target variable is incorrect: the "
4227               "address is invalid.");
4228           CGM.getDiags().Report(DiagID);
4229           continue;
4230         }
4231         break;
4232       }
4233       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4234                          CE->getVarSize().getQuantity(), Flags,
4235                          CE->getLinkage());
4236     } else {
4237       llvm_unreachable("Unsupported entry kind.");
4238     }
4239   }
4240 }
4241 
4242 /// Loads all the offload entries information from the host IR
4243 /// metadata.
4244 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4245   // If we are in target mode, load the metadata from the host IR. This code has
4246   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4247 
4248   if (!CGM.getLangOpts().OpenMPIsDevice)
4249     return;
4250 
4251   if (CGM.getLangOpts().OMPHostIRFile.empty())
4252     return;
4253 
4254   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4255   if (auto EC = Buf.getError()) {
4256     CGM.getDiags().Report(diag::err_cannot_open_file)
4257         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4258     return;
4259   }
4260 
4261   llvm::LLVMContext C;
4262   auto ME = expectedToErrorOrAndEmitErrors(
4263       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4264 
4265   if (auto EC = ME.getError()) {
4266     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4267         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4268     CGM.getDiags().Report(DiagID)
4269         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4270     return;
4271   }
4272 
4273   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4274   if (!MD)
4275     return;
4276 
4277   for (llvm::MDNode *MN : MD->operands()) {
4278     auto &&GetMDInt = [MN](unsigned Idx) {
4279       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4280       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4281     };
4282 
4283     auto &&GetMDString = [MN](unsigned Idx) {
4284       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4285       return V->getString();
4286     };
4287 
4288     switch (GetMDInt(0)) {
4289     default:
4290       llvm_unreachable("Unexpected metadata!");
4291       break;
4292     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4293         OffloadingEntryInfoTargetRegion:
4294       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4295           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4296           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4297           /*Order=*/GetMDInt(5));
4298       break;
4299     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300         OffloadingEntryInfoDeviceGlobalVar:
4301       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4302           /*MangledName=*/GetMDString(1),
4303           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4304               /*Flags=*/GetMDInt(2)),
4305           /*Order=*/GetMDInt(3));
4306       break;
4307     }
4308   }
4309 }
4310 
4311 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4312   if (!KmpRoutineEntryPtrTy) {
4313     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4314     ASTContext &C = CGM.getContext();
4315     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4316     FunctionProtoType::ExtProtoInfo EPI;
4317     KmpRoutineEntryPtrQTy = C.getPointerType(
4318         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4319     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4320   }
4321 }
4322 
4323 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4324   // Make sure the type of the entry is already created. This is the type we
4325   // have to create:
4326   // struct __tgt_offload_entry{
4327   //   void      *addr;       // Pointer to the offload entry info.
4328   //                          // (function or global)
4329   //   char      *name;       // Name of the function or global.
4330   //   size_t     size;       // Size of the entry info (0 if it a function).
4331   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4332   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4333   // };
4334   if (TgtOffloadEntryQTy.isNull()) {
4335     ASTContext &C = CGM.getContext();
4336     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4337     RD->startDefinition();
4338     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4339     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4340     addFieldToRecordDecl(C, RD, C.getSizeType());
4341     addFieldToRecordDecl(
4342         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4343     addFieldToRecordDecl(
4344         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4345     RD->completeDefinition();
4346     RD->addAttr(PackedAttr::CreateImplicit(C));
4347     TgtOffloadEntryQTy = C.getRecordType(RD);
4348   }
4349   return TgtOffloadEntryQTy;
4350 }
4351 
4352 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4353   // These are the types we need to build:
4354   // struct __tgt_device_image{
4355   // void   *ImageStart;       // Pointer to the target code start.
4356   // void   *ImageEnd;         // Pointer to the target code end.
4357   // // We also add the host entries to the device image, as it may be useful
4358   // // for the target runtime to have access to that information.
4359   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4360   //                                       // the entries.
4361   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4362   //                                       // entries (non inclusive).
4363   // };
4364   if (TgtDeviceImageQTy.isNull()) {
4365     ASTContext &C = CGM.getContext();
4366     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4367     RD->startDefinition();
4368     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4369     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4370     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4371     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4372     RD->completeDefinition();
4373     TgtDeviceImageQTy = C.getRecordType(RD);
4374   }
4375   return TgtDeviceImageQTy;
4376 }
4377 
4378 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4379   // struct __tgt_bin_desc{
4380   //   int32_t              NumDevices;      // Number of devices supported.
4381   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4382   //                                         // (one per device).
4383   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4384   //                                         // entries.
4385   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4386   //                                         // entries (non inclusive).
4387   // };
4388   if (TgtBinaryDescriptorQTy.isNull()) {
4389     ASTContext &C = CGM.getContext();
4390     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4391     RD->startDefinition();
4392     addFieldToRecordDecl(
4393         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4394     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4395     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4396     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4397     RD->completeDefinition();
4398     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4399   }
4400   return TgtBinaryDescriptorQTy;
4401 }
4402 
4403 namespace {
4404 struct PrivateHelpersTy {
4405   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4406                    const VarDecl *PrivateElemInit)
4407       : Original(Original), PrivateCopy(PrivateCopy),
4408         PrivateElemInit(PrivateElemInit) {}
4409   const VarDecl *Original;
4410   const VarDecl *PrivateCopy;
4411   const VarDecl *PrivateElemInit;
4412 };
4413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4414 } // anonymous namespace
4415 
4416 static RecordDecl *
4417 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4418   if (!Privates.empty()) {
4419     ASTContext &C = CGM.getContext();
4420     // Build struct .kmp_privates_t. {
4421     //         /*  private vars  */
4422     //       };
4423     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4424     RD->startDefinition();
4425     for (const auto &Pair : Privates) {
4426       const VarDecl *VD = Pair.second.Original;
4427       QualType Type = VD->getType().getNonReferenceType();
4428       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4429       if (VD->hasAttrs()) {
4430         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4431              E(VD->getAttrs().end());
4432              I != E; ++I)
4433           FD->addAttr(*I);
4434       }
4435     }
4436     RD->completeDefinition();
4437     return RD;
4438   }
4439   return nullptr;
4440 }
4441 
4442 static RecordDecl *
4443 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4444                          QualType KmpInt32Ty,
4445                          QualType KmpRoutineEntryPointerQTy) {
4446   ASTContext &C = CGM.getContext();
4447   // Build struct kmp_task_t {
4448   //         void *              shareds;
4449   //         kmp_routine_entry_t routine;
4450   //         kmp_int32           part_id;
4451   //         kmp_cmplrdata_t data1;
4452   //         kmp_cmplrdata_t data2;
4453   // For taskloops additional fields:
4454   //         kmp_uint64          lb;
4455   //         kmp_uint64          ub;
4456   //         kmp_int64           st;
4457   //         kmp_int32           liter;
4458   //         void *              reductions;
4459   //       };
4460   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4461   UD->startDefinition();
4462   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4463   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4464   UD->completeDefinition();
4465   QualType KmpCmplrdataTy = C.getRecordType(UD);
4466   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4467   RD->startDefinition();
4468   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4469   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4470   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4471   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4472   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4473   if (isOpenMPTaskLoopDirective(Kind)) {
4474     QualType KmpUInt64Ty =
4475         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4476     QualType KmpInt64Ty =
4477         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4478     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4479     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4480     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4481     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4482     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4483   }
4484   RD->completeDefinition();
4485   return RD;
4486 }
4487 
4488 static RecordDecl *
4489 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4490                                      ArrayRef<PrivateDataTy> Privates) {
4491   ASTContext &C = CGM.getContext();
4492   // Build struct kmp_task_t_with_privates {
4493   //         kmp_task_t task_data;
4494   //         .kmp_privates_t. privates;
4495   //       };
4496   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4497   RD->startDefinition();
4498   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4499   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4500     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4501   RD->completeDefinition();
4502   return RD;
4503 }
4504 
4505 /// Emit a proxy function which accepts kmp_task_t as the second
4506 /// argument.
4507 /// \code
4508 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4509 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4510 ///   For taskloops:
4511 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4512 ///   tt->reductions, tt->shareds);
4513 ///   return 0;
4514 /// }
4515 /// \endcode
4516 static llvm::Function *
4517 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4518                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4519                       QualType KmpTaskTWithPrivatesPtrQTy,
4520                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4521                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4522                       llvm::Value *TaskPrivatesMap) {
4523   ASTContext &C = CGM.getContext();
4524   FunctionArgList Args;
4525   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4526                             ImplicitParamDecl::Other);
4527   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4528                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4529                                 ImplicitParamDecl::Other);
4530   Args.push_back(&GtidArg);
4531   Args.push_back(&TaskTypeArg);
4532   const auto &TaskEntryFnInfo =
4533       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4534   llvm::FunctionType *TaskEntryTy =
4535       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4536   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4537   auto *TaskEntry = llvm::Function::Create(
4538       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4539   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4540   TaskEntry->setDoesNotRecurse();
4541   CodeGenFunction CGF(CGM);
4542   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4543                     Loc, Loc);
4544 
4545   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4546   // tt,
4547   // For taskloops:
4548   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549   // tt->task_data.shareds);
4550   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4551       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4552   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4553       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4554       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4555   const auto *KmpTaskTWithPrivatesQTyRD =
4556       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4557   LValue Base =
4558       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4559   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4560   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4562   llvm::Value *PartidParam = PartIdLVal.getPointer();
4563 
4564   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4565   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4566   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4567       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4568       CGF.ConvertTypeForMem(SharedsPtrTy));
4569 
4570   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4571   llvm::Value *PrivatesParam;
4572   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4573     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4574     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4576   } else {
4577     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4578   }
4579 
4580   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4581                                TaskPrivatesMap,
4582                                CGF.Builder
4583                                    .CreatePointerBitCastOrAddrSpaceCast(
4584                                        TDBase.getAddress(), CGF.VoidPtrTy)
4585                                    .getPointer()};
4586   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4587                                           std::end(CommonArgs));
4588   if (isOpenMPTaskLoopDirective(Kind)) {
4589     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4590     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4591     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4592     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4593     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4594     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4595     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4596     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4597     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4598     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4599     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4600     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4601     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4602     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4603     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4604     CallArgs.push_back(LBParam);
4605     CallArgs.push_back(UBParam);
4606     CallArgs.push_back(StParam);
4607     CallArgs.push_back(LIParam);
4608     CallArgs.push_back(RParam);
4609   }
4610   CallArgs.push_back(SharedsParam);
4611 
4612   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4613                                                   CallArgs);
4614   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4615                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4616   CGF.FinishFunction();
4617   return TaskEntry;
4618 }
4619 
4620 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4621                                             SourceLocation Loc,
4622                                             QualType KmpInt32Ty,
4623                                             QualType KmpTaskTWithPrivatesPtrQTy,
4624                                             QualType KmpTaskTWithPrivatesQTy) {
4625   ASTContext &C = CGM.getContext();
4626   FunctionArgList Args;
4627   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4628                             ImplicitParamDecl::Other);
4629   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4630                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4631                                 ImplicitParamDecl::Other);
4632   Args.push_back(&GtidArg);
4633   Args.push_back(&TaskTypeArg);
4634   const auto &DestructorFnInfo =
4635       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4636   llvm::FunctionType *DestructorFnTy =
4637       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4638   std::string Name =
4639       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4640   auto *DestructorFn =
4641       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4642                              Name, &CGM.getModule());
4643   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4644                                     DestructorFnInfo);
4645   DestructorFn->setDoesNotRecurse();
4646   CodeGenFunction CGF(CGM);
4647   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4648                     Args, Loc, Loc);
4649 
4650   LValue Base = CGF.EmitLoadOfPointerLValue(
4651       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4652       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4653   const auto *KmpTaskTWithPrivatesQTyRD =
4654       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4655   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4656   Base = CGF.EmitLValueForField(Base, *FI);
4657   for (const auto *Field :
4658        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4659     if (QualType::DestructionKind DtorKind =
4660             Field->getType().isDestructedType()) {
4661       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4662       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4663     }
4664   }
4665   CGF.FinishFunction();
4666   return DestructorFn;
4667 }
4668 
4669 /// Emit a privates mapping function for correct handling of private and
4670 /// firstprivate variables.
4671 /// \code
4672 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4673 /// **noalias priv1,...,  <tyn> **noalias privn) {
4674 ///   *priv1 = &.privates.priv1;
4675 ///   ...;
4676 ///   *privn = &.privates.privn;
4677 /// }
4678 /// \endcode
4679 static llvm::Value *
4680 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4681                                ArrayRef<const Expr *> PrivateVars,
4682                                ArrayRef<const Expr *> FirstprivateVars,
4683                                ArrayRef<const Expr *> LastprivateVars,
4684                                QualType PrivatesQTy,
4685                                ArrayRef<PrivateDataTy> Privates) {
4686   ASTContext &C = CGM.getContext();
4687   FunctionArgList Args;
4688   ImplicitParamDecl TaskPrivatesArg(
4689       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4690       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4691       ImplicitParamDecl::Other);
4692   Args.push_back(&TaskPrivatesArg);
4693   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4694   unsigned Counter = 1;
4695   for (const Expr *E : PrivateVars) {
4696     Args.push_back(ImplicitParamDecl::Create(
4697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698         C.getPointerType(C.getPointerType(E->getType()))
4699             .withConst()
4700             .withRestrict(),
4701         ImplicitParamDecl::Other));
4702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703     PrivateVarsPos[VD] = Counter;
4704     ++Counter;
4705   }
4706   for (const Expr *E : FirstprivateVars) {
4707     Args.push_back(ImplicitParamDecl::Create(
4708         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4709         C.getPointerType(C.getPointerType(E->getType()))
4710             .withConst()
4711             .withRestrict(),
4712         ImplicitParamDecl::Other));
4713     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4714     PrivateVarsPos[VD] = Counter;
4715     ++Counter;
4716   }
4717   for (const Expr *E : LastprivateVars) {
4718     Args.push_back(ImplicitParamDecl::Create(
4719         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4720         C.getPointerType(C.getPointerType(E->getType()))
4721             .withConst()
4722             .withRestrict(),
4723         ImplicitParamDecl::Other));
4724     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4725     PrivateVarsPos[VD] = Counter;
4726     ++Counter;
4727   }
4728   const auto &TaskPrivatesMapFnInfo =
4729       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4730   llvm::FunctionType *TaskPrivatesMapTy =
4731       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4732   std::string Name =
4733       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4734   auto *TaskPrivatesMap = llvm::Function::Create(
4735       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4736       &CGM.getModule());
4737   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4738                                     TaskPrivatesMapFnInfo);
4739   if (CGM.getLangOpts().Optimize) {
4740     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4741     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4742     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4743   }
4744   CodeGenFunction CGF(CGM);
4745   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4746                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4747 
4748   // *privi = &.privates.privi;
4749   LValue Base = CGF.EmitLoadOfPointerLValue(
4750       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4751       TaskPrivatesArg.getType()->castAs<PointerType>());
4752   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4753   Counter = 0;
4754   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4755     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4756     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4757     LValue RefLVal =
4758         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4759     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4760         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4761     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4762     ++Counter;
4763   }
4764   CGF.FinishFunction();
4765   return TaskPrivatesMap;
4766 }
4767 
4768 /// Emit initialization for private variables in task-based directives.
4769 static void emitPrivatesInit(CodeGenFunction &CGF,
4770                              const OMPExecutableDirective &D,
4771                              Address KmpTaskSharedsPtr, LValue TDBase,
4772                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4773                              QualType SharedsTy, QualType SharedsPtrTy,
4774                              const OMPTaskDataTy &Data,
4775                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4776   ASTContext &C = CGF.getContext();
4777   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4778   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4779   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4780                                  ? OMPD_taskloop
4781                                  : OMPD_task;
4782   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4783   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4784   LValue SrcBase;
4785   bool IsTargetTask =
4786       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4787       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4788   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4789   // PointersArray and SizesArray. The original variables for these arrays are
4790   // not captured and we get their addresses explicitly.
4791   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4792       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4793     SrcBase = CGF.MakeAddrLValue(
4794         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4795             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4796         SharedsTy);
4797   }
4798   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4799   for (const PrivateDataTy &Pair : Privates) {
4800     const VarDecl *VD = Pair.second.PrivateCopy;
4801     const Expr *Init = VD->getAnyInitializer();
4802     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4803                              !CGF.isTrivialInitializer(Init)))) {
4804       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4805       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4806         const VarDecl *OriginalVD = Pair.second.Original;
4807         // Check if the variable is the target-based BasePointersArray,
4808         // PointersArray or SizesArray.
4809         LValue SharedRefLValue;
4810         QualType Type = PrivateLValue.getType();
4811         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4812         if (IsTargetTask && !SharedField) {
4813           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4814                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4815                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4816                          ->getNumParams() == 0 &&
4817                  isa<TranslationUnitDecl>(
4818                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4819                          ->getDeclContext()) &&
4820                  "Expected artificial target data variable.");
4821           SharedRefLValue =
4822               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4823         } else {
4824           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4825           SharedRefLValue = CGF.MakeAddrLValue(
4826               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4827               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4828               SharedRefLValue.getTBAAInfo());
4829         }
4830         if (Type->isArrayType()) {
4831           // Initialize firstprivate array.
4832           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4833             // Perform simple memcpy.
4834             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4835           } else {
4836             // Initialize firstprivate array using element-by-element
4837             // initialization.
4838             CGF.EmitOMPAggregateAssign(
4839                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4840                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4841                                                   Address SrcElement) {
4842                   // Clean up any temporaries needed by the initialization.
4843                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4844                   InitScope.addPrivate(
4845                       Elem, [SrcElement]() -> Address { return SrcElement; });
4846                   (void)InitScope.Privatize();
4847                   // Emit initialization for single element.
4848                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4849                       CGF, &CapturesInfo);
4850                   CGF.EmitAnyExprToMem(Init, DestElement,
4851                                        Init->getType().getQualifiers(),
4852                                        /*IsInitializer=*/false);
4853                 });
4854           }
4855         } else {
4856           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4857           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4858             return SharedRefLValue.getAddress();
4859           });
4860           (void)InitScope.Privatize();
4861           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4862           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4863                              /*capturedByInit=*/false);
4864         }
4865       } else {
4866         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4867       }
4868     }
4869     ++FI;
4870   }
4871 }
4872 
4873 /// Check if duplication function is required for taskloops.
4874 static bool checkInitIsRequired(CodeGenFunction &CGF,
4875                                 ArrayRef<PrivateDataTy> Privates) {
4876   bool InitRequired = false;
4877   for (const PrivateDataTy &Pair : Privates) {
4878     const VarDecl *VD = Pair.second.PrivateCopy;
4879     const Expr *Init = VD->getAnyInitializer();
4880     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4881                                     !CGF.isTrivialInitializer(Init));
4882     if (InitRequired)
4883       break;
4884   }
4885   return InitRequired;
4886 }
4887 
4888 
4889 /// Emit task_dup function (for initialization of
4890 /// private/firstprivate/lastprivate vars and last_iter flag)
4891 /// \code
4892 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4893 /// lastpriv) {
4894 /// // setup lastprivate flag
4895 ///    task_dst->last = lastpriv;
4896 /// // could be constructor calls here...
4897 /// }
4898 /// \endcode
4899 static llvm::Value *
4900 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4901                     const OMPExecutableDirective &D,
4902                     QualType KmpTaskTWithPrivatesPtrQTy,
4903                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4904                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4905                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4906                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4907   ASTContext &C = CGM.getContext();
4908   FunctionArgList Args;
4909   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4910                            KmpTaskTWithPrivatesPtrQTy,
4911                            ImplicitParamDecl::Other);
4912   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4913                            KmpTaskTWithPrivatesPtrQTy,
4914                            ImplicitParamDecl::Other);
4915   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4916                                 ImplicitParamDecl::Other);
4917   Args.push_back(&DstArg);
4918   Args.push_back(&SrcArg);
4919   Args.push_back(&LastprivArg);
4920   const auto &TaskDupFnInfo =
4921       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4922   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4923   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4924   auto *TaskDup = llvm::Function::Create(
4925       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4926   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4927   TaskDup->setDoesNotRecurse();
4928   CodeGenFunction CGF(CGM);
4929   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4930                     Loc);
4931 
4932   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4933       CGF.GetAddrOfLocalVar(&DstArg),
4934       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4935   // task_dst->liter = lastpriv;
4936   if (WithLastIter) {
4937     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4938     LValue Base = CGF.EmitLValueForField(
4939         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4940     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4941     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4942         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4943     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4944   }
4945 
4946   // Emit initial values for private copies (if any).
4947   assert(!Privates.empty());
4948   Address KmpTaskSharedsPtr = Address::invalid();
4949   if (!Data.FirstprivateVars.empty()) {
4950     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4951         CGF.GetAddrOfLocalVar(&SrcArg),
4952         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4953     LValue Base = CGF.EmitLValueForField(
4954         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4955     KmpTaskSharedsPtr = Address(
4956         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4957                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4958                                                   KmpTaskTShareds)),
4959                              Loc),
4960         CGF.getNaturalTypeAlignment(SharedsTy));
4961   }
4962   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4963                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4964   CGF.FinishFunction();
4965   return TaskDup;
4966 }
4967 
4968 /// Checks if destructor function is required to be generated.
4969 /// \return true if cleanups are required, false otherwise.
4970 static bool
4971 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4972   bool NeedsCleanup = false;
4973   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4974   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4975   for (const FieldDecl *FD : PrivateRD->fields()) {
4976     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4977     if (NeedsCleanup)
4978       break;
4979   }
4980   return NeedsCleanup;
4981 }
4982 
4983 CGOpenMPRuntime::TaskResultTy
4984 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4985                               const OMPExecutableDirective &D,
4986                               llvm::Function *TaskFunction, QualType SharedsTy,
4987                               Address Shareds, const OMPTaskDataTy &Data) {
4988   ASTContext &C = CGM.getContext();
4989   llvm::SmallVector<PrivateDataTy, 4> Privates;
4990   // Aggregate privates and sort them by the alignment.
4991   auto I = Data.PrivateCopies.begin();
4992   for (const Expr *E : Data.PrivateVars) {
4993     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4994     Privates.emplace_back(
4995         C.getDeclAlign(VD),
4996         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4997                          /*PrivateElemInit=*/nullptr));
4998     ++I;
4999   }
5000   I = Data.FirstprivateCopies.begin();
5001   auto IElemInitRef = Data.FirstprivateInits.begin();
5002   for (const Expr *E : Data.FirstprivateVars) {
5003     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5004     Privates.emplace_back(
5005         C.getDeclAlign(VD),
5006         PrivateHelpersTy(
5007             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5008             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5009     ++I;
5010     ++IElemInitRef;
5011   }
5012   I = Data.LastprivateCopies.begin();
5013   for (const Expr *E : Data.LastprivateVars) {
5014     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5015     Privates.emplace_back(
5016         C.getDeclAlign(VD),
5017         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5018                          /*PrivateElemInit=*/nullptr));
5019     ++I;
5020   }
5021   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5022     return L.first > R.first;
5023   });
5024   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5025   // Build type kmp_routine_entry_t (if not built yet).
5026   emitKmpRoutineEntryT(KmpInt32Ty);
5027   // Build type kmp_task_t (if not built yet).
5028   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5029     if (SavedKmpTaskloopTQTy.isNull()) {
5030       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5031           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5032     }
5033     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5034   } else {
5035     assert((D.getDirectiveKind() == OMPD_task ||
5036             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5037             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5038            "Expected taskloop, task or target directive");
5039     if (SavedKmpTaskTQTy.isNull()) {
5040       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5041           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5042     }
5043     KmpTaskTQTy = SavedKmpTaskTQTy;
5044   }
5045   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5046   // Build particular struct kmp_task_t for the given task.
5047   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5048       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5049   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5050   QualType KmpTaskTWithPrivatesPtrQTy =
5051       C.getPointerType(KmpTaskTWithPrivatesQTy);
5052   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5053   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5054       KmpTaskTWithPrivatesTy->getPointerTo();
5055   llvm::Value *KmpTaskTWithPrivatesTySize =
5056       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5057   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5058 
5059   // Emit initial values for private copies (if any).
5060   llvm::Value *TaskPrivatesMap = nullptr;
5061   llvm::Type *TaskPrivatesMapTy =
5062       std::next(TaskFunction->arg_begin(), 3)->getType();
5063   if (!Privates.empty()) {
5064     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5065     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5066         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5067         FI->getType(), Privates);
5068     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5069         TaskPrivatesMap, TaskPrivatesMapTy);
5070   } else {
5071     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5072         cast<llvm::PointerType>(TaskPrivatesMapTy));
5073   }
5074   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5075   // kmp_task_t *tt);
5076   llvm::Function *TaskEntry = emitProxyTaskFunction(
5077       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5078       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5079       TaskPrivatesMap);
5080 
5081   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5082   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5083   // kmp_routine_entry_t *task_entry);
5084   // Task flags. Format is taken from
5085   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5086   // description of kmp_tasking_flags struct.
5087   enum {
5088     TiedFlag = 0x1,
5089     FinalFlag = 0x2,
5090     DestructorsFlag = 0x8,
5091     PriorityFlag = 0x20
5092   };
5093   unsigned Flags = Data.Tied ? TiedFlag : 0;
5094   bool NeedsCleanup = false;
5095   if (!Privates.empty()) {
5096     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5097     if (NeedsCleanup)
5098       Flags = Flags | DestructorsFlag;
5099   }
5100   if (Data.Priority.getInt())
5101     Flags = Flags | PriorityFlag;
5102   llvm::Value *TaskFlags =
5103       Data.Final.getPointer()
5104           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5105                                      CGF.Builder.getInt32(FinalFlag),
5106                                      CGF.Builder.getInt32(/*C=*/0))
5107           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5108   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5109   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5110   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5111       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5112       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5113           TaskEntry, KmpRoutineEntryPtrTy)};
5114   llvm::Value *NewTask;
5115   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5116     // Check if we have any device clause associated with the directive.
5117     const Expr *Device = nullptr;
5118     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5119       Device = C->getDevice();
5120     // Emit device ID if any otherwise use default value.
5121     llvm::Value *DeviceID;
5122     if (Device)
5123       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5124                                            CGF.Int64Ty, /*isSigned=*/true);
5125     else
5126       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5127     AllocArgs.push_back(DeviceID);
5128     NewTask = CGF.EmitRuntimeCall(
5129       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5130   } else {
5131     NewTask = CGF.EmitRuntimeCall(
5132       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5133   }
5134   llvm::Value *NewTaskNewTaskTTy =
5135       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5136           NewTask, KmpTaskTWithPrivatesPtrTy);
5137   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5138                                                KmpTaskTWithPrivatesQTy);
5139   LValue TDBase =
5140       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5141   // Fill the data in the resulting kmp_task_t record.
5142   // Copy shareds if there are any.
5143   Address KmpTaskSharedsPtr = Address::invalid();
5144   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5145     KmpTaskSharedsPtr =
5146         Address(CGF.EmitLoadOfScalar(
5147                     CGF.EmitLValueForField(
5148                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5149                                            KmpTaskTShareds)),
5150                     Loc),
5151                 CGF.getNaturalTypeAlignment(SharedsTy));
5152     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5153     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5154     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5155   }
5156   // Emit initial values for private copies (if any).
5157   TaskResultTy Result;
5158   if (!Privates.empty()) {
5159     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5160                      SharedsTy, SharedsPtrTy, Data, Privates,
5161                      /*ForDup=*/false);
5162     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5163         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5164       Result.TaskDupFn = emitTaskDupFunction(
5165           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5166           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5167           /*WithLastIter=*/!Data.LastprivateVars.empty());
5168     }
5169   }
5170   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5171   enum { Priority = 0, Destructors = 1 };
5172   // Provide pointer to function with destructors for privates.
5173   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5174   const RecordDecl *KmpCmplrdataUD =
5175       (*FI)->getType()->getAsUnionType()->getDecl();
5176   if (NeedsCleanup) {
5177     llvm::Value *DestructorFn = emitDestructorsFunction(
5178         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5179         KmpTaskTWithPrivatesQTy);
5180     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5181     LValue DestructorsLV = CGF.EmitLValueForField(
5182         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5183     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5184                               DestructorFn, KmpRoutineEntryPtrTy),
5185                           DestructorsLV);
5186   }
5187   // Set priority.
5188   if (Data.Priority.getInt()) {
5189     LValue Data2LV = CGF.EmitLValueForField(
5190         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5191     LValue PriorityLV = CGF.EmitLValueForField(
5192         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5193     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5194   }
5195   Result.NewTask = NewTask;
5196   Result.TaskEntry = TaskEntry;
5197   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5198   Result.TDBase = TDBase;
5199   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5200   return Result;
5201 }
5202 
5203 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5204                                    const OMPExecutableDirective &D,
5205                                    llvm::Function *TaskFunction,
5206                                    QualType SharedsTy, Address Shareds,
5207                                    const Expr *IfCond,
5208                                    const OMPTaskDataTy &Data) {
5209   if (!CGF.HaveInsertPoint())
5210     return;
5211 
5212   TaskResultTy Result =
5213       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5214   llvm::Value *NewTask = Result.NewTask;
5215   llvm::Function *TaskEntry = Result.TaskEntry;
5216   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5217   LValue TDBase = Result.TDBase;
5218   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5219   ASTContext &C = CGM.getContext();
5220   // Process list of dependences.
5221   Address DependenciesArray = Address::invalid();
5222   unsigned NumDependencies = Data.Dependences.size();
5223   if (NumDependencies) {
5224     // Dependence kind for RTL.
5225     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5226     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5227     RecordDecl *KmpDependInfoRD;
5228     QualType FlagsTy =
5229         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5230     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5231     if (KmpDependInfoTy.isNull()) {
5232       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5233       KmpDependInfoRD->startDefinition();
5234       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5235       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5236       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5237       KmpDependInfoRD->completeDefinition();
5238       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5239     } else {
5240       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5241     }
5242     // Define type kmp_depend_info[<Dependences.size()>];
5243     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5244         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5245         ArrayType::Normal, /*IndexTypeQuals=*/0);
5246     // kmp_depend_info[<Dependences.size()>] deps;
5247     DependenciesArray =
5248         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5249     for (unsigned I = 0; I < NumDependencies; ++I) {
5250       const Expr *E = Data.Dependences[I].second;
5251       LValue Addr = CGF.EmitLValue(E);
5252       llvm::Value *Size;
5253       QualType Ty = E->getType();
5254       if (const auto *ASE =
5255               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5256         LValue UpAddrLVal =
5257             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5258         llvm::Value *UpAddr =
5259             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5260         llvm::Value *LowIntPtr =
5261             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5262         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5263         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5264       } else {
5265         Size = CGF.getTypeSize(Ty);
5266       }
5267       LValue Base = CGF.MakeAddrLValue(
5268           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5269           KmpDependInfoTy);
5270       // deps[i].base_addr = &<Dependences[i].second>;
5271       LValue BaseAddrLVal = CGF.EmitLValueForField(
5272           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5273       CGF.EmitStoreOfScalar(
5274           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5275           BaseAddrLVal);
5276       // deps[i].len = sizeof(<Dependences[i].second>);
5277       LValue LenLVal = CGF.EmitLValueForField(
5278           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5279       CGF.EmitStoreOfScalar(Size, LenLVal);
5280       // deps[i].flags = <Dependences[i].first>;
5281       RTLDependenceKindTy DepKind;
5282       switch (Data.Dependences[I].first) {
5283       case OMPC_DEPEND_in:
5284         DepKind = DepIn;
5285         break;
5286       // Out and InOut dependencies must use the same code.
5287       case OMPC_DEPEND_out:
5288       case OMPC_DEPEND_inout:
5289         DepKind = DepInOut;
5290         break;
5291       case OMPC_DEPEND_mutexinoutset:
5292         DepKind = DepMutexInOutSet;
5293         break;
5294       case OMPC_DEPEND_source:
5295       case OMPC_DEPEND_sink:
5296       case OMPC_DEPEND_unknown:
5297         llvm_unreachable("Unknown task dependence type");
5298       }
5299       LValue FlagsLVal = CGF.EmitLValueForField(
5300           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5301       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5302                             FlagsLVal);
5303     }
5304     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5306   }
5307 
5308   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5309   // libcall.
5310   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5311   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5312   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5313   // list is not empty
5314   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5315   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5316   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5317   llvm::Value *DepTaskArgs[7];
5318   if (NumDependencies) {
5319     DepTaskArgs[0] = UpLoc;
5320     DepTaskArgs[1] = ThreadID;
5321     DepTaskArgs[2] = NewTask;
5322     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5323     DepTaskArgs[4] = DependenciesArray.getPointer();
5324     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5325     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5326   }
5327   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5328                         &TaskArgs,
5329                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5330     if (!Data.Tied) {
5331       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5332       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5333       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5334     }
5335     if (NumDependencies) {
5336       CGF.EmitRuntimeCall(
5337           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5338     } else {
5339       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5340                           TaskArgs);
5341     }
5342     // Check if parent region is untied and build return for untied task;
5343     if (auto *Region =
5344             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5345       Region->emitUntiedSwitch(CGF);
5346   };
5347 
5348   llvm::Value *DepWaitTaskArgs[6];
5349   if (NumDependencies) {
5350     DepWaitTaskArgs[0] = UpLoc;
5351     DepWaitTaskArgs[1] = ThreadID;
5352     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5353     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5354     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5355     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5356   }
5357   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5358                         NumDependencies, &DepWaitTaskArgs,
5359                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5360     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5361     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5362     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5363     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5364     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5365     // is specified.
5366     if (NumDependencies)
5367       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5368                           DepWaitTaskArgs);
5369     // Call proxy_task_entry(gtid, new_task);
5370     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5371                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5372       Action.Enter(CGF);
5373       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5374       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5375                                                           OutlinedFnArgs);
5376     };
5377 
5378     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5379     // kmp_task_t *new_task);
5380     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5381     // kmp_task_t *new_task);
5382     RegionCodeGenTy RCG(CodeGen);
5383     CommonActionTy Action(
5384         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5385         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5386     RCG.setAction(Action);
5387     RCG(CGF);
5388   };
5389 
5390   if (IfCond) {
5391     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5392   } else {
5393     RegionCodeGenTy ThenRCG(ThenCodeGen);
5394     ThenRCG(CGF);
5395   }
5396 }
5397 
5398 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5399                                        const OMPLoopDirective &D,
5400                                        llvm::Function *TaskFunction,
5401                                        QualType SharedsTy, Address Shareds,
5402                                        const Expr *IfCond,
5403                                        const OMPTaskDataTy &Data) {
5404   if (!CGF.HaveInsertPoint())
5405     return;
5406   TaskResultTy Result =
5407       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5408   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5409   // libcall.
5410   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5411   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5412   // sched, kmp_uint64 grainsize, void *task_dup);
5413   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5414   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5415   llvm::Value *IfVal;
5416   if (IfCond) {
5417     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5418                                       /*isSigned=*/true);
5419   } else {
5420     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5421   }
5422 
5423   LValue LBLVal = CGF.EmitLValueForField(
5424       Result.TDBase,
5425       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5426   const auto *LBVar =
5427       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5428   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5429                        /*IsInitializer=*/true);
5430   LValue UBLVal = CGF.EmitLValueForField(
5431       Result.TDBase,
5432       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5433   const auto *UBVar =
5434       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5435   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5436                        /*IsInitializer=*/true);
5437   LValue StLVal = CGF.EmitLValueForField(
5438       Result.TDBase,
5439       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5440   const auto *StVar =
5441       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5442   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5443                        /*IsInitializer=*/true);
5444   // Store reductions address.
5445   LValue RedLVal = CGF.EmitLValueForField(
5446       Result.TDBase,
5447       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5448   if (Data.Reductions) {
5449     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5450   } else {
5451     CGF.EmitNullInitialization(RedLVal.getAddress(),
5452                                CGF.getContext().VoidPtrTy);
5453   }
5454   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5455   llvm::Value *TaskArgs[] = {
5456       UpLoc,
5457       ThreadID,
5458       Result.NewTask,
5459       IfVal,
5460       LBLVal.getPointer(),
5461       UBLVal.getPointer(),
5462       CGF.EmitLoadOfScalar(StLVal, Loc),
5463       llvm::ConstantInt::getSigned(
5464               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5465       llvm::ConstantInt::getSigned(
5466           CGF.IntTy, Data.Schedule.getPointer()
5467                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5468                          : NoSchedule),
5469       Data.Schedule.getPointer()
5470           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5471                                       /*isSigned=*/false)
5472           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5473       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5474                              Result.TaskDupFn, CGF.VoidPtrTy)
5475                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5476   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5477 }
5478 
5479 /// Emit reduction operation for each element of array (required for
5480 /// array sections) LHS op = RHS.
5481 /// \param Type Type of array.
5482 /// \param LHSVar Variable on the left side of the reduction operation
5483 /// (references element of array in original variable).
5484 /// \param RHSVar Variable on the right side of the reduction operation
5485 /// (references element of array in original variable).
5486 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5487 /// RHSVar.
5488 static void EmitOMPAggregateReduction(
5489     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5490     const VarDecl *RHSVar,
5491     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5492                                   const Expr *, const Expr *)> &RedOpGen,
5493     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5494     const Expr *UpExpr = nullptr) {
5495   // Perform element-by-element initialization.
5496   QualType ElementTy;
5497   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5498   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5499 
5500   // Drill down to the base element type on both arrays.
5501   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5502   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5503 
5504   llvm::Value *RHSBegin = RHSAddr.getPointer();
5505   llvm::Value *LHSBegin = LHSAddr.getPointer();
5506   // Cast from pointer to array type to pointer to single element.
5507   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5508   // The basic structure here is a while-do loop.
5509   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5510   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5511   llvm::Value *IsEmpty =
5512       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5513   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5514 
5515   // Enter the loop body, making that address the current address.
5516   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5517   CGF.EmitBlock(BodyBB);
5518 
5519   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5520 
5521   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5522       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5523   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5524   Address RHSElementCurrent =
5525       Address(RHSElementPHI,
5526               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5527 
5528   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5529       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5530   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5531   Address LHSElementCurrent =
5532       Address(LHSElementPHI,
5533               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5534 
5535   // Emit copy.
5536   CodeGenFunction::OMPPrivateScope Scope(CGF);
5537   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5538   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5539   Scope.Privatize();
5540   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5541   Scope.ForceCleanup();
5542 
5543   // Shift the address forward by one element.
5544   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5545       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5546   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5547       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5548   // Check whether we've reached the end.
5549   llvm::Value *Done =
5550       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5551   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5552   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5553   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5554 
5555   // Done.
5556   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5557 }
5558 
5559 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5560 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5561 /// UDR combiner function.
5562 static void emitReductionCombiner(CodeGenFunction &CGF,
5563                                   const Expr *ReductionOp) {
5564   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5565     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5566       if (const auto *DRE =
5567               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5568         if (const auto *DRD =
5569                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5570           std::pair<llvm::Function *, llvm::Function *> Reduction =
5571               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5572           RValue Func = RValue::get(Reduction.first);
5573           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5574           CGF.EmitIgnoredExpr(ReductionOp);
5575           return;
5576         }
5577   CGF.EmitIgnoredExpr(ReductionOp);
5578 }
5579 
5580 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5581     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5582     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5583     ArrayRef<const Expr *> ReductionOps) {
5584   ASTContext &C = CGM.getContext();
5585 
5586   // void reduction_func(void *LHSArg, void *RHSArg);
5587   FunctionArgList Args;
5588   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5589                            ImplicitParamDecl::Other);
5590   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5591                            ImplicitParamDecl::Other);
5592   Args.push_back(&LHSArg);
5593   Args.push_back(&RHSArg);
5594   const auto &CGFI =
5595       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5596   std::string Name = getName({"omp", "reduction", "reduction_func"});
5597   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5598                                     llvm::GlobalValue::InternalLinkage, Name,
5599                                     &CGM.getModule());
5600   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5601   Fn->setDoesNotRecurse();
5602   CodeGenFunction CGF(CGM);
5603   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5604 
5605   // Dst = (void*[n])(LHSArg);
5606   // Src = (void*[n])(RHSArg);
5607   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5608       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5609       ArgsType), CGF.getPointerAlign());
5610   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5611       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5612       ArgsType), CGF.getPointerAlign());
5613 
5614   //  ...
5615   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5616   //  ...
5617   CodeGenFunction::OMPPrivateScope Scope(CGF);
5618   auto IPriv = Privates.begin();
5619   unsigned Idx = 0;
5620   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5621     const auto *RHSVar =
5622         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5623     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5624       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5625     });
5626     const auto *LHSVar =
5627         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5628     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5629       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5630     });
5631     QualType PrivTy = (*IPriv)->getType();
5632     if (PrivTy->isVariablyModifiedType()) {
5633       // Get array size and emit VLA type.
5634       ++Idx;
5635       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5636       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5637       const VariableArrayType *VLA =
5638           CGF.getContext().getAsVariableArrayType(PrivTy);
5639       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5640       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5641           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5642       CGF.EmitVariablyModifiedType(PrivTy);
5643     }
5644   }
5645   Scope.Privatize();
5646   IPriv = Privates.begin();
5647   auto ILHS = LHSExprs.begin();
5648   auto IRHS = RHSExprs.begin();
5649   for (const Expr *E : ReductionOps) {
5650     if ((*IPriv)->getType()->isArrayType()) {
5651       // Emit reduction for array section.
5652       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5653       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5654       EmitOMPAggregateReduction(
5655           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5656           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5657             emitReductionCombiner(CGF, E);
5658           });
5659     } else {
5660       // Emit reduction for array subscript or single variable.
5661       emitReductionCombiner(CGF, E);
5662     }
5663     ++IPriv;
5664     ++ILHS;
5665     ++IRHS;
5666   }
5667   Scope.ForceCleanup();
5668   CGF.FinishFunction();
5669   return Fn;
5670 }
5671 
5672 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5673                                                   const Expr *ReductionOp,
5674                                                   const Expr *PrivateRef,
5675                                                   const DeclRefExpr *LHS,
5676                                                   const DeclRefExpr *RHS) {
5677   if (PrivateRef->getType()->isArrayType()) {
5678     // Emit reduction for array section.
5679     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5680     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5681     EmitOMPAggregateReduction(
5682         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5683         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5684           emitReductionCombiner(CGF, ReductionOp);
5685         });
5686   } else {
5687     // Emit reduction for array subscript or single variable.
5688     emitReductionCombiner(CGF, ReductionOp);
5689   }
5690 }
5691 
5692 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5693                                     ArrayRef<const Expr *> Privates,
5694                                     ArrayRef<const Expr *> LHSExprs,
5695                                     ArrayRef<const Expr *> RHSExprs,
5696                                     ArrayRef<const Expr *> ReductionOps,
5697                                     ReductionOptionsTy Options) {
5698   if (!CGF.HaveInsertPoint())
5699     return;
5700 
5701   bool WithNowait = Options.WithNowait;
5702   bool SimpleReduction = Options.SimpleReduction;
5703 
5704   // Next code should be emitted for reduction:
5705   //
5706   // static kmp_critical_name lock = { 0 };
5707   //
5708   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5709   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5710   //  ...
5711   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5712   //  *(Type<n>-1*)rhs[<n>-1]);
5713   // }
5714   //
5715   // ...
5716   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5717   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5718   // RedList, reduce_func, &<lock>)) {
5719   // case 1:
5720   //  ...
5721   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5722   //  ...
5723   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5724   // break;
5725   // case 2:
5726   //  ...
5727   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5728   //  ...
5729   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5730   // break;
5731   // default:;
5732   // }
5733   //
5734   // if SimpleReduction is true, only the next code is generated:
5735   //  ...
5736   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5737   //  ...
5738 
5739   ASTContext &C = CGM.getContext();
5740 
5741   if (SimpleReduction) {
5742     CodeGenFunction::RunCleanupsScope Scope(CGF);
5743     auto IPriv = Privates.begin();
5744     auto ILHS = LHSExprs.begin();
5745     auto IRHS = RHSExprs.begin();
5746     for (const Expr *E : ReductionOps) {
5747       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5748                                   cast<DeclRefExpr>(*IRHS));
5749       ++IPriv;
5750       ++ILHS;
5751       ++IRHS;
5752     }
5753     return;
5754   }
5755 
5756   // 1. Build a list of reduction variables.
5757   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5758   auto Size = RHSExprs.size();
5759   for (const Expr *E : Privates) {
5760     if (E->getType()->isVariablyModifiedType())
5761       // Reserve place for array size.
5762       ++Size;
5763   }
5764   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5765   QualType ReductionArrayTy =
5766       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5767                              /*IndexTypeQuals=*/0);
5768   Address ReductionList =
5769       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5770   auto IPriv = Privates.begin();
5771   unsigned Idx = 0;
5772   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5773     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5774     CGF.Builder.CreateStore(
5775         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5776             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5777         Elem);
5778     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5779       // Store array size.
5780       ++Idx;
5781       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5782       llvm::Value *Size = CGF.Builder.CreateIntCast(
5783           CGF.getVLASize(
5784                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5785               .NumElts,
5786           CGF.SizeTy, /*isSigned=*/false);
5787       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5788                               Elem);
5789     }
5790   }
5791 
5792   // 2. Emit reduce_func().
5793   llvm::Function *ReductionFn = emitReductionFunction(
5794       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5795       LHSExprs, RHSExprs, ReductionOps);
5796 
5797   // 3. Create static kmp_critical_name lock = { 0 };
5798   std::string Name = getName({"reduction"});
5799   llvm::Value *Lock = getCriticalRegionLock(Name);
5800 
5801   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5802   // RedList, reduce_func, &<lock>);
5803   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5804   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5805   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5806   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5807       ReductionList.getPointer(), CGF.VoidPtrTy);
5808   llvm::Value *Args[] = {
5809       IdentTLoc,                             // ident_t *<loc>
5810       ThreadId,                              // i32 <gtid>
5811       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5812       ReductionArrayTySize,                  // size_type sizeof(RedList)
5813       RL,                                    // void *RedList
5814       ReductionFn, // void (*) (void *, void *) <reduce_func>
5815       Lock         // kmp_critical_name *&<lock>
5816   };
5817   llvm::Value *Res = CGF.EmitRuntimeCall(
5818       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5819                                        : OMPRTL__kmpc_reduce),
5820       Args);
5821 
5822   // 5. Build switch(res)
5823   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5824   llvm::SwitchInst *SwInst =
5825       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5826 
5827   // 6. Build case 1:
5828   //  ...
5829   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5830   //  ...
5831   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5832   // break;
5833   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5834   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5835   CGF.EmitBlock(Case1BB);
5836 
5837   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5838   llvm::Value *EndArgs[] = {
5839       IdentTLoc, // ident_t *<loc>
5840       ThreadId,  // i32 <gtid>
5841       Lock       // kmp_critical_name *&<lock>
5842   };
5843   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5844                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5845     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5846     auto IPriv = Privates.begin();
5847     auto ILHS = LHSExprs.begin();
5848     auto IRHS = RHSExprs.begin();
5849     for (const Expr *E : ReductionOps) {
5850       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5851                                      cast<DeclRefExpr>(*IRHS));
5852       ++IPriv;
5853       ++ILHS;
5854       ++IRHS;
5855     }
5856   };
5857   RegionCodeGenTy RCG(CodeGen);
5858   CommonActionTy Action(
5859       nullptr, llvm::None,
5860       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5861                                        : OMPRTL__kmpc_end_reduce),
5862       EndArgs);
5863   RCG.setAction(Action);
5864   RCG(CGF);
5865 
5866   CGF.EmitBranch(DefaultBB);
5867 
5868   // 7. Build case 2:
5869   //  ...
5870   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5871   //  ...
5872   // break;
5873   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5874   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5875   CGF.EmitBlock(Case2BB);
5876 
5877   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5878                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5879     auto ILHS = LHSExprs.begin();
5880     auto IRHS = RHSExprs.begin();
5881     auto IPriv = Privates.begin();
5882     for (const Expr *E : ReductionOps) {
5883       const Expr *XExpr = nullptr;
5884       const Expr *EExpr = nullptr;
5885       const Expr *UpExpr = nullptr;
5886       BinaryOperatorKind BO = BO_Comma;
5887       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5888         if (BO->getOpcode() == BO_Assign) {
5889           XExpr = BO->getLHS();
5890           UpExpr = BO->getRHS();
5891         }
5892       }
5893       // Try to emit update expression as a simple atomic.
5894       const Expr *RHSExpr = UpExpr;
5895       if (RHSExpr) {
5896         // Analyze RHS part of the whole expression.
5897         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5898                 RHSExpr->IgnoreParenImpCasts())) {
5899           // If this is a conditional operator, analyze its condition for
5900           // min/max reduction operator.
5901           RHSExpr = ACO->getCond();
5902         }
5903         if (const auto *BORHS =
5904                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5905           EExpr = BORHS->getRHS();
5906           BO = BORHS->getOpcode();
5907         }
5908       }
5909       if (XExpr) {
5910         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5911         auto &&AtomicRedGen = [BO, VD,
5912                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5913                                     const Expr *EExpr, const Expr *UpExpr) {
5914           LValue X = CGF.EmitLValue(XExpr);
5915           RValue E;
5916           if (EExpr)
5917             E = CGF.EmitAnyExpr(EExpr);
5918           CGF.EmitOMPAtomicSimpleUpdateExpr(
5919               X, E, BO, /*IsXLHSInRHSPart=*/true,
5920               llvm::AtomicOrdering::Monotonic, Loc,
5921               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5922                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5923                 PrivateScope.addPrivate(
5924                     VD, [&CGF, VD, XRValue, Loc]() {
5925                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5926                       CGF.emitOMPSimpleStore(
5927                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5928                           VD->getType().getNonReferenceType(), Loc);
5929                       return LHSTemp;
5930                     });
5931                 (void)PrivateScope.Privatize();
5932                 return CGF.EmitAnyExpr(UpExpr);
5933               });
5934         };
5935         if ((*IPriv)->getType()->isArrayType()) {
5936           // Emit atomic reduction for array section.
5937           const auto *RHSVar =
5938               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5939           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5940                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5941         } else {
5942           // Emit atomic reduction for array subscript or single variable.
5943           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5944         }
5945       } else {
5946         // Emit as a critical region.
5947         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5948                                            const Expr *, const Expr *) {
5949           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5950           std::string Name = RT.getName({"atomic_reduction"});
5951           RT.emitCriticalRegion(
5952               CGF, Name,
5953               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5954                 Action.Enter(CGF);
5955                 emitReductionCombiner(CGF, E);
5956               },
5957               Loc);
5958         };
5959         if ((*IPriv)->getType()->isArrayType()) {
5960           const auto *LHSVar =
5961               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5962           const auto *RHSVar =
5963               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5964           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5965                                     CritRedGen);
5966         } else {
5967           CritRedGen(CGF, nullptr, nullptr, nullptr);
5968         }
5969       }
5970       ++ILHS;
5971       ++IRHS;
5972       ++IPriv;
5973     }
5974   };
5975   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5976   if (!WithNowait) {
5977     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5978     llvm::Value *EndArgs[] = {
5979         IdentTLoc, // ident_t *<loc>
5980         ThreadId,  // i32 <gtid>
5981         Lock       // kmp_critical_name *&<lock>
5982     };
5983     CommonActionTy Action(nullptr, llvm::None,
5984                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5985                           EndArgs);
5986     AtomicRCG.setAction(Action);
5987     AtomicRCG(CGF);
5988   } else {
5989     AtomicRCG(CGF);
5990   }
5991 
5992   CGF.EmitBranch(DefaultBB);
5993   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5994 }
5995 
5996 /// Generates unique name for artificial threadprivate variables.
5997 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5998 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5999                                       const Expr *Ref) {
6000   SmallString<256> Buffer;
6001   llvm::raw_svector_ostream Out(Buffer);
6002   const clang::DeclRefExpr *DE;
6003   const VarDecl *D = ::getBaseDecl(Ref, DE);
6004   if (!D)
6005     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6006   D = D->getCanonicalDecl();
6007   std::string Name = CGM.getOpenMPRuntime().getName(
6008       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6009   Out << Prefix << Name << "_"
6010       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6011   return Out.str();
6012 }
6013 
6014 /// Emits reduction initializer function:
6015 /// \code
6016 /// void @.red_init(void* %arg) {
6017 /// %0 = bitcast void* %arg to <type>*
6018 /// store <type> <init>, <type>* %0
6019 /// ret void
6020 /// }
6021 /// \endcode
6022 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6023                                            SourceLocation Loc,
6024                                            ReductionCodeGen &RCG, unsigned N) {
6025   ASTContext &C = CGM.getContext();
6026   FunctionArgList Args;
6027   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6028                           ImplicitParamDecl::Other);
6029   Args.emplace_back(&Param);
6030   const auto &FnInfo =
6031       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6032   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6033   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6034   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6035                                     Name, &CGM.getModule());
6036   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6037   Fn->setDoesNotRecurse();
6038   CodeGenFunction CGF(CGM);
6039   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6040   Address PrivateAddr = CGF.EmitLoadOfPointer(
6041       CGF.GetAddrOfLocalVar(&Param),
6042       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6043   llvm::Value *Size = nullptr;
6044   // If the size of the reduction item is non-constant, load it from global
6045   // threadprivate variable.
6046   if (RCG.getSizes(N).second) {
6047     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6048         CGF, CGM.getContext().getSizeType(),
6049         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6050     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6051                                 CGM.getContext().getSizeType(), Loc);
6052   }
6053   RCG.emitAggregateType(CGF, N, Size);
6054   LValue SharedLVal;
6055   // If initializer uses initializer from declare reduction construct, emit a
6056   // pointer to the address of the original reduction item (reuired by reduction
6057   // initializer)
6058   if (RCG.usesReductionInitializer(N)) {
6059     Address SharedAddr =
6060         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6061             CGF, CGM.getContext().VoidPtrTy,
6062             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6063     SharedAddr = CGF.EmitLoadOfPointer(
6064         SharedAddr,
6065         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6066     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6067   } else {
6068     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6069         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6070         CGM.getContext().VoidPtrTy);
6071   }
6072   // Emit the initializer:
6073   // %0 = bitcast void* %arg to <type>*
6074   // store <type> <init>, <type>* %0
6075   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6076                          [](CodeGenFunction &) { return false; });
6077   CGF.FinishFunction();
6078   return Fn;
6079 }
6080 
6081 /// Emits reduction combiner function:
6082 /// \code
6083 /// void @.red_comb(void* %arg0, void* %arg1) {
6084 /// %lhs = bitcast void* %arg0 to <type>*
6085 /// %rhs = bitcast void* %arg1 to <type>*
6086 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6087 /// store <type> %2, <type>* %lhs
6088 /// ret void
6089 /// }
6090 /// \endcode
6091 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6092                                            SourceLocation Loc,
6093                                            ReductionCodeGen &RCG, unsigned N,
6094                                            const Expr *ReductionOp,
6095                                            const Expr *LHS, const Expr *RHS,
6096                                            const Expr *PrivateRef) {
6097   ASTContext &C = CGM.getContext();
6098   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6099   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6100   FunctionArgList Args;
6101   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6102                                C.VoidPtrTy, ImplicitParamDecl::Other);
6103   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6104                             ImplicitParamDecl::Other);
6105   Args.emplace_back(&ParamInOut);
6106   Args.emplace_back(&ParamIn);
6107   const auto &FnInfo =
6108       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6109   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6110   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6111   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6112                                     Name, &CGM.getModule());
6113   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6114   Fn->setDoesNotRecurse();
6115   CodeGenFunction CGF(CGM);
6116   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6117   llvm::Value *Size = nullptr;
6118   // If the size of the reduction item is non-constant, load it from global
6119   // threadprivate variable.
6120   if (RCG.getSizes(N).second) {
6121     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6122         CGF, CGM.getContext().getSizeType(),
6123         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6124     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6125                                 CGM.getContext().getSizeType(), Loc);
6126   }
6127   RCG.emitAggregateType(CGF, N, Size);
6128   // Remap lhs and rhs variables to the addresses of the function arguments.
6129   // %lhs = bitcast void* %arg0 to <type>*
6130   // %rhs = bitcast void* %arg1 to <type>*
6131   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6132   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6133     // Pull out the pointer to the variable.
6134     Address PtrAddr = CGF.EmitLoadOfPointer(
6135         CGF.GetAddrOfLocalVar(&ParamInOut),
6136         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6137     return CGF.Builder.CreateElementBitCast(
6138         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6139   });
6140   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6141     // Pull out the pointer to the variable.
6142     Address PtrAddr = CGF.EmitLoadOfPointer(
6143         CGF.GetAddrOfLocalVar(&ParamIn),
6144         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6145     return CGF.Builder.CreateElementBitCast(
6146         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6147   });
6148   PrivateScope.Privatize();
6149   // Emit the combiner body:
6150   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6151   // store <type> %2, <type>* %lhs
6152   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6153       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6154       cast<DeclRefExpr>(RHS));
6155   CGF.FinishFunction();
6156   return Fn;
6157 }
6158 
6159 /// Emits reduction finalizer function:
6160 /// \code
6161 /// void @.red_fini(void* %arg) {
6162 /// %0 = bitcast void* %arg to <type>*
6163 /// <destroy>(<type>* %0)
6164 /// ret void
6165 /// }
6166 /// \endcode
6167 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6168                                            SourceLocation Loc,
6169                                            ReductionCodeGen &RCG, unsigned N) {
6170   if (!RCG.needCleanups(N))
6171     return nullptr;
6172   ASTContext &C = CGM.getContext();
6173   FunctionArgList Args;
6174   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6175                           ImplicitParamDecl::Other);
6176   Args.emplace_back(&Param);
6177   const auto &FnInfo =
6178       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6179   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6180   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6181   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6182                                     Name, &CGM.getModule());
6183   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6184   Fn->setDoesNotRecurse();
6185   CodeGenFunction CGF(CGM);
6186   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6187   Address PrivateAddr = CGF.EmitLoadOfPointer(
6188       CGF.GetAddrOfLocalVar(&Param),
6189       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6190   llvm::Value *Size = nullptr;
6191   // If the size of the reduction item is non-constant, load it from global
6192   // threadprivate variable.
6193   if (RCG.getSizes(N).second) {
6194     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6195         CGF, CGM.getContext().getSizeType(),
6196         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6197     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6198                                 CGM.getContext().getSizeType(), Loc);
6199   }
6200   RCG.emitAggregateType(CGF, N, Size);
6201   // Emit the finalizer body:
6202   // <destroy>(<type>* %0)
6203   RCG.emitCleanups(CGF, N, PrivateAddr);
6204   CGF.FinishFunction();
6205   return Fn;
6206 }
6207 
6208 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6209     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6210     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6211   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6212     return nullptr;
6213 
6214   // Build typedef struct:
6215   // kmp_task_red_input {
6216   //   void *reduce_shar; // shared reduction item
6217   //   size_t reduce_size; // size of data item
6218   //   void *reduce_init; // data initialization routine
6219   //   void *reduce_fini; // data finalization routine
6220   //   void *reduce_comb; // data combiner routine
6221   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6222   // } kmp_task_red_input_t;
6223   ASTContext &C = CGM.getContext();
6224   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6225   RD->startDefinition();
6226   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6227   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6228   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6229   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6230   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6231   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6232       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6233   RD->completeDefinition();
6234   QualType RDType = C.getRecordType(RD);
6235   unsigned Size = Data.ReductionVars.size();
6236   llvm::APInt ArraySize(/*numBits=*/64, Size);
6237   QualType ArrayRDType = C.getConstantArrayType(
6238       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6239   // kmp_task_red_input_t .rd_input.[Size];
6240   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6241   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6242                        Data.ReductionOps);
6243   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6244     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6245     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6246                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6247     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6248         TaskRedInput.getPointer(), Idxs,
6249         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6250         ".rd_input.gep.");
6251     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6252     // ElemLVal.reduce_shar = &Shareds[Cnt];
6253     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6254     RCG.emitSharedLValue(CGF, Cnt);
6255     llvm::Value *CastedShared =
6256         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6257     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6258     RCG.emitAggregateType(CGF, Cnt);
6259     llvm::Value *SizeValInChars;
6260     llvm::Value *SizeVal;
6261     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6262     // We use delayed creation/initialization for VLAs, array sections and
6263     // custom reduction initializations. It is required because runtime does not
6264     // provide the way to pass the sizes of VLAs/array sections to
6265     // initializer/combiner/finalizer functions and does not pass the pointer to
6266     // original reduction item to the initializer. Instead threadprivate global
6267     // variables are used to store these values and use them in the functions.
6268     bool DelayedCreation = !!SizeVal;
6269     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6270                                                /*isSigned=*/false);
6271     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6272     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6273     // ElemLVal.reduce_init = init;
6274     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6275     llvm::Value *InitAddr =
6276         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6277     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6278     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6279     // ElemLVal.reduce_fini = fini;
6280     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6281     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6282     llvm::Value *FiniAddr = Fini
6283                                 ? CGF.EmitCastToVoidPtr(Fini)
6284                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6285     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6286     // ElemLVal.reduce_comb = comb;
6287     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6288     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6289         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6290         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6291     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6292     // ElemLVal.flags = 0;
6293     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6294     if (DelayedCreation) {
6295       CGF.EmitStoreOfScalar(
6296           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6297           FlagsLVal);
6298     } else
6299       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6300   }
6301   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6302   // *data);
6303   llvm::Value *Args[] = {
6304       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6305                                 /*isSigned=*/true),
6306       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6307       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6308                                                       CGM.VoidPtrTy)};
6309   return CGF.EmitRuntimeCall(
6310       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6311 }
6312 
6313 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6314                                               SourceLocation Loc,
6315                                               ReductionCodeGen &RCG,
6316                                               unsigned N) {
6317   auto Sizes = RCG.getSizes(N);
6318   // Emit threadprivate global variable if the type is non-constant
6319   // (Sizes.second = nullptr).
6320   if (Sizes.second) {
6321     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6322                                                      /*isSigned=*/false);
6323     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6324         CGF, CGM.getContext().getSizeType(),
6325         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6326     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6327   }
6328   // Store address of the original reduction item if custom initializer is used.
6329   if (RCG.usesReductionInitializer(N)) {
6330     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6331         CGF, CGM.getContext().VoidPtrTy,
6332         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6333     CGF.Builder.CreateStore(
6334         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6335             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6336         SharedAddr, /*IsVolatile=*/false);
6337   }
6338 }
6339 
6340 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6341                                               SourceLocation Loc,
6342                                               llvm::Value *ReductionsPtr,
6343                                               LValue SharedLVal) {
6344   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6345   // *d);
6346   llvm::Value *Args[] = {
6347       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6348                                 /*isSigned=*/true),
6349       ReductionsPtr,
6350       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6351                                                       CGM.VoidPtrTy)};
6352   return Address(
6353       CGF.EmitRuntimeCall(
6354           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6355       SharedLVal.getAlignment());
6356 }
6357 
6358 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6359                                        SourceLocation Loc) {
6360   if (!CGF.HaveInsertPoint())
6361     return;
6362   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6363   // global_tid);
6364   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6365   // Ignore return result until untied tasks are supported.
6366   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6367   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6368     Region->emitUntiedSwitch(CGF);
6369 }
6370 
6371 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6372                                            OpenMPDirectiveKind InnerKind,
6373                                            const RegionCodeGenTy &CodeGen,
6374                                            bool HasCancel) {
6375   if (!CGF.HaveInsertPoint())
6376     return;
6377   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6378   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6379 }
6380 
6381 namespace {
6382 enum RTCancelKind {
6383   CancelNoreq = 0,
6384   CancelParallel = 1,
6385   CancelLoop = 2,
6386   CancelSections = 3,
6387   CancelTaskgroup = 4
6388 };
6389 } // anonymous namespace
6390 
6391 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6392   RTCancelKind CancelKind = CancelNoreq;
6393   if (CancelRegion == OMPD_parallel)
6394     CancelKind = CancelParallel;
6395   else if (CancelRegion == OMPD_for)
6396     CancelKind = CancelLoop;
6397   else if (CancelRegion == OMPD_sections)
6398     CancelKind = CancelSections;
6399   else {
6400     assert(CancelRegion == OMPD_taskgroup);
6401     CancelKind = CancelTaskgroup;
6402   }
6403   return CancelKind;
6404 }
6405 
6406 void CGOpenMPRuntime::emitCancellationPointCall(
6407     CodeGenFunction &CGF, SourceLocation Loc,
6408     OpenMPDirectiveKind CancelRegion) {
6409   if (!CGF.HaveInsertPoint())
6410     return;
6411   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6412   // global_tid, kmp_int32 cncl_kind);
6413   if (auto *OMPRegionInfo =
6414           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6415     // For 'cancellation point taskgroup', the task region info may not have a
6416     // cancel. This may instead happen in another adjacent task.
6417     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6418       llvm::Value *Args[] = {
6419           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6420           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6421       // Ignore return result until untied tasks are supported.
6422       llvm::Value *Result = CGF.EmitRuntimeCall(
6423           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6424       // if (__kmpc_cancellationpoint()) {
6425       //   exit from construct;
6426       // }
6427       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6428       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6429       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6430       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6431       CGF.EmitBlock(ExitBB);
6432       // exit from construct;
6433       CodeGenFunction::JumpDest CancelDest =
6434           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6435       CGF.EmitBranchThroughCleanup(CancelDest);
6436       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6437     }
6438   }
6439 }
6440 
6441 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6442                                      const Expr *IfCond,
6443                                      OpenMPDirectiveKind CancelRegion) {
6444   if (!CGF.HaveInsertPoint())
6445     return;
6446   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6447   // kmp_int32 cncl_kind);
6448   if (auto *OMPRegionInfo =
6449           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6450     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6451                                                         PrePostActionTy &) {
6452       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6453       llvm::Value *Args[] = {
6454           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6455           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6456       // Ignore return result until untied tasks are supported.
6457       llvm::Value *Result = CGF.EmitRuntimeCall(
6458           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6459       // if (__kmpc_cancel()) {
6460       //   exit from construct;
6461       // }
6462       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6463       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6464       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6465       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6466       CGF.EmitBlock(ExitBB);
6467       // exit from construct;
6468       CodeGenFunction::JumpDest CancelDest =
6469           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6470       CGF.EmitBranchThroughCleanup(CancelDest);
6471       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6472     };
6473     if (IfCond) {
6474       emitOMPIfClause(CGF, IfCond, ThenGen,
6475                       [](CodeGenFunction &, PrePostActionTy &) {});
6476     } else {
6477       RegionCodeGenTy ThenRCG(ThenGen);
6478       ThenRCG(CGF);
6479     }
6480   }
6481 }
6482 
6483 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6484     const OMPExecutableDirective &D, StringRef ParentName,
6485     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6486     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6487   assert(!ParentName.empty() && "Invalid target region parent name!");
6488   HasEmittedTargetRegion = true;
6489   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6490                                    IsOffloadEntry, CodeGen);
6491 }
6492 
6493 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6494     const OMPExecutableDirective &D, StringRef ParentName,
6495     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6496     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6497   // Create a unique name for the entry function using the source location
6498   // information of the current target region. The name will be something like:
6499   //
6500   // __omp_offloading_DD_FFFF_PP_lBB
6501   //
6502   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6503   // mangled name of the function that encloses the target region and BB is the
6504   // line number of the target region.
6505 
6506   unsigned DeviceID;
6507   unsigned FileID;
6508   unsigned Line;
6509   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6510                            Line);
6511   SmallString<64> EntryFnName;
6512   {
6513     llvm::raw_svector_ostream OS(EntryFnName);
6514     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6515        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6516   }
6517 
6518   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6519 
6520   CodeGenFunction CGF(CGM, true);
6521   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6522   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6523 
6524   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6525 
6526   // If this target outline function is not an offload entry, we don't need to
6527   // register it.
6528   if (!IsOffloadEntry)
6529     return;
6530 
6531   // The target region ID is used by the runtime library to identify the current
6532   // target region, so it only has to be unique and not necessarily point to
6533   // anything. It could be the pointer to the outlined function that implements
6534   // the target region, but we aren't using that so that the compiler doesn't
6535   // need to keep that, and could therefore inline the host function if proven
6536   // worthwhile during optimization. In the other hand, if emitting code for the
6537   // device, the ID has to be the function address so that it can retrieved from
6538   // the offloading entry and launched by the runtime library. We also mark the
6539   // outlined function to have external linkage in case we are emitting code for
6540   // the device, because these functions will be entry points to the device.
6541 
6542   if (CGM.getLangOpts().OpenMPIsDevice) {
6543     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6545     OutlinedFn->setDSOLocal(false);
6546   } else {
6547     std::string Name = getName({EntryFnName, "region_id"});
6548     OutlinedFnID = new llvm::GlobalVariable(
6549         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6550         llvm::GlobalValue::WeakAnyLinkage,
6551         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6552   }
6553 
6554   // Register the information for the entry associated with this target region.
6555   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6556       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6557       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6558 }
6559 
6560 /// Checks if the expression is constant or does not have non-trivial function
6561 /// calls.
6562 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6563   // We can skip constant expressions.
6564   // We can skip expressions with trivial calls or simple expressions.
6565   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6566           !E->hasNonTrivialCall(Ctx)) &&
6567          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6568 }
6569 
6570 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6571                                                     const Stmt *Body) {
6572   const Stmt *Child = Body->IgnoreContainers();
6573   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6574     Child = nullptr;
6575     for (const Stmt *S : C->body()) {
6576       if (const auto *E = dyn_cast<Expr>(S)) {
6577         if (isTrivial(Ctx, E))
6578           continue;
6579       }
6580       // Some of the statements can be ignored.
6581       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6582           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6583         continue;
6584       // Analyze declarations.
6585       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6586         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6587               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6588                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6589                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6590                   isa<UsingDirectiveDecl>(D) ||
6591                   isa<OMPDeclareReductionDecl>(D) ||
6592                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6593                 return true;
6594               const auto *VD = dyn_cast<VarDecl>(D);
6595               if (!VD)
6596                 return false;
6597               return VD->isConstexpr() ||
6598                      ((VD->getType().isTrivialType(Ctx) ||
6599                        VD->getType()->isReferenceType()) &&
6600                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6601             }))
6602           continue;
6603       }
6604       // Found multiple children - cannot get the one child only.
6605       if (Child)
6606         return nullptr;
6607       Child = S;
6608     }
6609     if (Child)
6610       Child = Child->IgnoreContainers();
6611   }
6612   return Child;
6613 }
6614 
6615 /// Emit the number of teams for a target directive.  Inspect the num_teams
6616 /// clause associated with a teams construct combined or closely nested
6617 /// with the target directive.
6618 ///
6619 /// Emit a team of size one for directives such as 'target parallel' that
6620 /// have no associated teams construct.
6621 ///
6622 /// Otherwise, return nullptr.
6623 static llvm::Value *
6624 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6625                                const OMPExecutableDirective &D) {
6626   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6627          "Clauses associated with the teams directive expected to be emitted "
6628          "only for the host!");
6629   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6630   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6631          "Expected target-based executable directive.");
6632   CGBuilderTy &Bld = CGF.Builder;
6633   switch (DirectiveKind) {
6634   case OMPD_target: {
6635     const auto *CS = D.getInnermostCapturedStmt();
6636     const auto *Body =
6637         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6638     const Stmt *ChildStmt =
6639         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6640     if (const auto *NestedDir =
6641             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6642       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6643         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6644           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646           const Expr *NumTeams =
6647               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6648           llvm::Value *NumTeamsVal =
6649               CGF.EmitScalarExpr(NumTeams,
6650                                  /*IgnoreResultAssign*/ true);
6651           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6652                                    /*IsSigned=*/true);
6653         }
6654         return Bld.getInt32(0);
6655       }
6656       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6657           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6658         return Bld.getInt32(1);
6659       return Bld.getInt32(0);
6660     }
6661     return nullptr;
6662   }
6663   case OMPD_target_teams:
6664   case OMPD_target_teams_distribute:
6665   case OMPD_target_teams_distribute_simd:
6666   case OMPD_target_teams_distribute_parallel_for:
6667   case OMPD_target_teams_distribute_parallel_for_simd: {
6668     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6669       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       llvm::Value *NumTeamsVal =
6673           CGF.EmitScalarExpr(NumTeams,
6674                              /*IgnoreResultAssign*/ true);
6675       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6676                                /*IsSigned=*/true);
6677     }
6678     return Bld.getInt32(0);
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     return Bld.getInt32(1);
6685   case OMPD_parallel:
6686   case OMPD_for:
6687   case OMPD_parallel_for:
6688   case OMPD_parallel_sections:
6689   case OMPD_for_simd:
6690   case OMPD_parallel_for_simd:
6691   case OMPD_cancel:
6692   case OMPD_cancellation_point:
6693   case OMPD_ordered:
6694   case OMPD_threadprivate:
6695   case OMPD_allocate:
6696   case OMPD_task:
6697   case OMPD_simd:
6698   case OMPD_sections:
6699   case OMPD_section:
6700   case OMPD_single:
6701   case OMPD_master:
6702   case OMPD_critical:
6703   case OMPD_taskyield:
6704   case OMPD_barrier:
6705   case OMPD_taskwait:
6706   case OMPD_taskgroup:
6707   case OMPD_atomic:
6708   case OMPD_flush:
6709   case OMPD_teams:
6710   case OMPD_target_data:
6711   case OMPD_target_exit_data:
6712   case OMPD_target_enter_data:
6713   case OMPD_distribute:
6714   case OMPD_distribute_simd:
6715   case OMPD_distribute_parallel_for:
6716   case OMPD_distribute_parallel_for_simd:
6717   case OMPD_teams_distribute:
6718   case OMPD_teams_distribute_simd:
6719   case OMPD_teams_distribute_parallel_for:
6720   case OMPD_teams_distribute_parallel_for_simd:
6721   case OMPD_target_update:
6722   case OMPD_declare_simd:
6723   case OMPD_declare_target:
6724   case OMPD_end_declare_target:
6725   case OMPD_declare_reduction:
6726   case OMPD_declare_mapper:
6727   case OMPD_taskloop:
6728   case OMPD_taskloop_simd:
6729   case OMPD_requires:
6730   case OMPD_unknown:
6731     break;
6732   }
6733   llvm_unreachable("Unexpected directive kind.");
6734 }
6735 
6736 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6737                                   llvm::Value *DefaultThreadLimitVal) {
6738   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6739       CGF.getContext(), CS->getCapturedStmt());
6740   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6741     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6742       llvm::Value *NumThreads = nullptr;
6743       llvm::Value *CondVal = nullptr;
6744       // Handle if clause. If if clause present, the number of threads is
6745       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6746       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6747         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6748         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6749         const OMPIfClause *IfClause = nullptr;
6750         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6751           if (C->getNameModifier() == OMPD_unknown ||
6752               C->getNameModifier() == OMPD_parallel) {
6753             IfClause = C;
6754             break;
6755           }
6756         }
6757         if (IfClause) {
6758           const Expr *Cond = IfClause->getCondition();
6759           bool Result;
6760           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6761             if (!Result)
6762               return CGF.Builder.getInt32(1);
6763           } else {
6764             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6765             if (const auto *PreInit =
6766                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6767               for (const auto *I : PreInit->decls()) {
6768                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6769                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6770                 } else {
6771                   CodeGenFunction::AutoVarEmission Emission =
6772                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6773                   CGF.EmitAutoVarCleanups(Emission);
6774                 }
6775               }
6776             }
6777             CondVal = CGF.EvaluateExprAsBool(Cond);
6778           }
6779         }
6780       }
6781       // Check the value of num_threads clause iff if clause was not specified
6782       // or is not evaluated to false.
6783       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6784         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786         const auto *NumThreadsClause =
6787             Dir->getSingleClause<OMPNumThreadsClause>();
6788         CodeGenFunction::LexicalScope Scope(
6789             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6790         if (const auto *PreInit =
6791                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6792           for (const auto *I : PreInit->decls()) {
6793             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794               CGF.EmitVarDecl(cast<VarDecl>(*I));
6795             } else {
6796               CodeGenFunction::AutoVarEmission Emission =
6797                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6798               CGF.EmitAutoVarCleanups(Emission);
6799             }
6800           }
6801         }
6802         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6803         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6804                                                /*IsSigned=*/false);
6805         if (DefaultThreadLimitVal)
6806           NumThreads = CGF.Builder.CreateSelect(
6807               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6808               DefaultThreadLimitVal, NumThreads);
6809       } else {
6810         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6811                                            : CGF.Builder.getInt32(0);
6812       }
6813       // Process condition of the if clause.
6814       if (CondVal) {
6815         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6816                                               CGF.Builder.getInt32(1));
6817       }
6818       return NumThreads;
6819     }
6820     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6821       return CGF.Builder.getInt32(1);
6822     return DefaultThreadLimitVal;
6823   }
6824   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6825                                : CGF.Builder.getInt32(0);
6826 }
6827 
6828 /// Emit the number of threads for a target directive.  Inspect the
6829 /// thread_limit clause associated with a teams construct combined or closely
6830 /// nested with the target directive.
6831 ///
6832 /// Emit the num_threads clause for directives such as 'target parallel' that
6833 /// have no associated teams construct.
6834 ///
6835 /// Otherwise, return nullptr.
6836 static llvm::Value *
6837 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6838                                  const OMPExecutableDirective &D) {
6839   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6840          "Clauses associated with the teams directive expected to be emitted "
6841          "only for the host!");
6842   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6843   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6844          "Expected target-based executable directive.");
6845   CGBuilderTy &Bld = CGF.Builder;
6846   llvm::Value *ThreadLimitVal = nullptr;
6847   llvm::Value *NumThreadsVal = nullptr;
6848   switch (DirectiveKind) {
6849   case OMPD_target: {
6850     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6851     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6852       return NumThreads;
6853     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6854         CGF.getContext(), CS->getCapturedStmt());
6855     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6857         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6858         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6859         const auto *ThreadLimitClause =
6860             Dir->getSingleClause<OMPThreadLimitClause>();
6861         CodeGenFunction::LexicalScope Scope(
6862             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6863         if (const auto *PreInit =
6864                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6865           for (const auto *I : PreInit->decls()) {
6866             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6867               CGF.EmitVarDecl(cast<VarDecl>(*I));
6868             } else {
6869               CodeGenFunction::AutoVarEmission Emission =
6870                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6871               CGF.EmitAutoVarCleanups(Emission);
6872             }
6873           }
6874         }
6875         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6876             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6877         ThreadLimitVal =
6878             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6879       }
6880       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6881           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6882         CS = Dir->getInnermostCapturedStmt();
6883         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6884             CGF.getContext(), CS->getCapturedStmt());
6885         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6886       }
6887       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6888           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6889         CS = Dir->getInnermostCapturedStmt();
6890         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6891           return NumThreads;
6892       }
6893       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6894         return Bld.getInt32(1);
6895     }
6896     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6897   }
6898   case OMPD_target_teams: {
6899     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904       ThreadLimitVal =
6905           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6906     }
6907     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6908     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6909       return NumThreads;
6910     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6911         CGF.getContext(), CS->getCapturedStmt());
6912     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6913       if (Dir->getDirectiveKind() == OMPD_distribute) {
6914         CS = Dir->getInnermostCapturedStmt();
6915         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6916           return NumThreads;
6917       }
6918     }
6919     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6920   }
6921   case OMPD_target_teams_distribute:
6922     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6923       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6924       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6926           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6927       ThreadLimitVal =
6928           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6929     }
6930     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6931   case OMPD_target_parallel:
6932   case OMPD_target_parallel_for:
6933   case OMPD_target_parallel_for_simd:
6934   case OMPD_target_teams_distribute_parallel_for:
6935   case OMPD_target_teams_distribute_parallel_for_simd: {
6936     llvm::Value *CondVal = nullptr;
6937     // Handle if clause. If if clause present, the number of threads is
6938     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6939     if (D.hasClausesOfKind<OMPIfClause>()) {
6940       const OMPIfClause *IfClause = nullptr;
6941       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6942         if (C->getNameModifier() == OMPD_unknown ||
6943             C->getNameModifier() == OMPD_parallel) {
6944           IfClause = C;
6945           break;
6946         }
6947       }
6948       if (IfClause) {
6949         const Expr *Cond = IfClause->getCondition();
6950         bool Result;
6951         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6952           if (!Result)
6953             return Bld.getInt32(1);
6954         } else {
6955           CodeGenFunction::RunCleanupsScope Scope(CGF);
6956           CondVal = CGF.EvaluateExprAsBool(Cond);
6957         }
6958       }
6959     }
6960     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6961       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6962       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6963       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6964           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6965       ThreadLimitVal =
6966           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6967     }
6968     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6969       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6970       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6971       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6972           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6973       NumThreadsVal =
6974           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6975       ThreadLimitVal = ThreadLimitVal
6976                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6977                                                                 ThreadLimitVal),
6978                                               NumThreadsVal, ThreadLimitVal)
6979                            : NumThreadsVal;
6980     }
6981     if (!ThreadLimitVal)
6982       ThreadLimitVal = Bld.getInt32(0);
6983     if (CondVal)
6984       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6985     return ThreadLimitVal;
6986   }
6987   case OMPD_target_teams_distribute_simd:
6988   case OMPD_target_simd:
6989     return Bld.getInt32(1);
6990   case OMPD_parallel:
6991   case OMPD_for:
6992   case OMPD_parallel_for:
6993   case OMPD_parallel_sections:
6994   case OMPD_for_simd:
6995   case OMPD_parallel_for_simd:
6996   case OMPD_cancel:
6997   case OMPD_cancellation_point:
6998   case OMPD_ordered:
6999   case OMPD_threadprivate:
7000   case OMPD_allocate:
7001   case OMPD_task:
7002   case OMPD_simd:
7003   case OMPD_sections:
7004   case OMPD_section:
7005   case OMPD_single:
7006   case OMPD_master:
7007   case OMPD_critical:
7008   case OMPD_taskyield:
7009   case OMPD_barrier:
7010   case OMPD_taskwait:
7011   case OMPD_taskgroup:
7012   case OMPD_atomic:
7013   case OMPD_flush:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_target:
7029   case OMPD_end_declare_target:
7030   case OMPD_declare_reduction:
7031   case OMPD_declare_mapper:
7032   case OMPD_taskloop:
7033   case OMPD_taskloop_simd:
7034   case OMPD_requires:
7035   case OMPD_unknown:
7036     break;
7037   }
7038   llvm_unreachable("Unsupported directive kind.");
7039 }
7040 
7041 namespace {
7042 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7043 
7044 // Utility to handle information from clauses associated with a given
7045 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7046 // It provides a convenient interface to obtain the information and generate
7047 // code for that information.
7048 class MappableExprsHandler {
7049 public:
7050   /// Values for bit flags used to specify the mapping type for
7051   /// offloading.
7052   enum OpenMPOffloadMappingFlags : uint64_t {
7053     /// No flags
7054     OMP_MAP_NONE = 0x0,
7055     /// Allocate memory on the device and move data from host to device.
7056     OMP_MAP_TO = 0x01,
7057     /// Allocate memory on the device and move data from device to host.
7058     OMP_MAP_FROM = 0x02,
7059     /// Always perform the requested mapping action on the element, even
7060     /// if it was already mapped before.
7061     OMP_MAP_ALWAYS = 0x04,
7062     /// Delete the element from the device environment, ignoring the
7063     /// current reference count associated with the element.
7064     OMP_MAP_DELETE = 0x08,
7065     /// The element being mapped is a pointer-pointee pair; both the
7066     /// pointer and the pointee should be mapped.
7067     OMP_MAP_PTR_AND_OBJ = 0x10,
7068     /// This flags signals that the base address of an entry should be
7069     /// passed to the target kernel as an argument.
7070     OMP_MAP_TARGET_PARAM = 0x20,
7071     /// Signal that the runtime library has to return the device pointer
7072     /// in the current position for the data being mapped. Used when we have the
7073     /// use_device_ptr clause.
7074     OMP_MAP_RETURN_PARAM = 0x40,
7075     /// This flag signals that the reference being passed is a pointer to
7076     /// private data.
7077     OMP_MAP_PRIVATE = 0x80,
7078     /// Pass the element to the device by value.
7079     OMP_MAP_LITERAL = 0x100,
7080     /// Implicit map
7081     OMP_MAP_IMPLICIT = 0x200,
7082     /// The 16 MSBs of the flags indicate whether the entry is member of some
7083     /// struct/class.
7084     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7085     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7086   };
7087 
7088   /// Class that associates information with a base pointer to be passed to the
7089   /// runtime library.
7090   class BasePointerInfo {
7091     /// The base pointer.
7092     llvm::Value *Ptr = nullptr;
7093     /// The base declaration that refers to this device pointer, or null if
7094     /// there is none.
7095     const ValueDecl *DevPtrDecl = nullptr;
7096 
7097   public:
7098     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7099         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7100     llvm::Value *operator*() const { return Ptr; }
7101     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7102     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7103   };
7104 
7105   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7106   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7107   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7108 
7109   /// Map between a struct and the its lowest & highest elements which have been
7110   /// mapped.
7111   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7112   ///                    HE(FieldIndex, Pointer)}
7113   struct StructRangeInfoTy {
7114     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7115         0, Address::invalid()};
7116     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7117         0, Address::invalid()};
7118     Address Base = Address::invalid();
7119   };
7120 
7121 private:
7122   /// Kind that defines how a device pointer has to be returned.
7123   struct MapInfo {
7124     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7125     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7126     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7127     bool ReturnDevicePointer = false;
7128     bool IsImplicit = false;
7129 
7130     MapInfo() = default;
7131     MapInfo(
7132         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7133         OpenMPMapClauseKind MapType,
7134         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7135         bool ReturnDevicePointer, bool IsImplicit)
7136         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7137           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7138   };
7139 
7140   /// If use_device_ptr is used on a pointer which is a struct member and there
7141   /// is no map information about it, then emission of that entry is deferred
7142   /// until the whole struct has been processed.
7143   struct DeferredDevicePtrEntryTy {
7144     const Expr *IE = nullptr;
7145     const ValueDecl *VD = nullptr;
7146 
7147     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7148         : IE(IE), VD(VD) {}
7149   };
7150 
7151   /// Directive from where the map clauses were extracted.
7152   const OMPExecutableDirective &CurDir;
7153 
7154   /// Function the directive is being generated for.
7155   CodeGenFunction &CGF;
7156 
7157   /// Set of all first private variables in the current directive.
7158   /// bool data is set to true if the variable is implicitly marked as
7159   /// firstprivate, false otherwise.
7160   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7161 
7162   /// Map between device pointer declarations and their expression components.
7163   /// The key value for declarations in 'this' is null.
7164   llvm::DenseMap<
7165       const ValueDecl *,
7166       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7167       DevPointersMap;
7168 
7169   llvm::Value *getExprTypeSize(const Expr *E) const {
7170     QualType ExprTy = E->getType().getCanonicalType();
7171 
7172     // Reference types are ignored for mapping purposes.
7173     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7174       ExprTy = RefTy->getPointeeType().getCanonicalType();
7175 
7176     // Given that an array section is considered a built-in type, we need to
7177     // do the calculation based on the length of the section instead of relying
7178     // on CGF.getTypeSize(E->getType()).
7179     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7180       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7181                             OAE->getBase()->IgnoreParenImpCasts())
7182                             .getCanonicalType();
7183 
7184       // If there is no length associated with the expression, that means we
7185       // are using the whole length of the base.
7186       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7187         return CGF.getTypeSize(BaseTy);
7188 
7189       llvm::Value *ElemSize;
7190       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192       } else {
7193         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194         assert(ATy && "Expecting array type if not a pointer type.");
7195         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196       }
7197 
7198       // If we don't have a length at this point, that is because we have an
7199       // array section with a single element.
7200       if (!OAE->getLength())
7201         return ElemSize;
7202 
7203       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7204       LengthVal =
7205           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7206       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7207     }
7208     return CGF.getTypeSize(ExprTy);
7209   }
7210 
7211   /// Return the corresponding bits for a given map clause modifier. Add
7212   /// a flag marking the map as a pointer if requested. Add a flag marking the
7213   /// map as the first one of a series of maps that relate to the same map
7214   /// expression.
7215   OpenMPOffloadMappingFlags getMapTypeBits(
7216       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7217       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7218     OpenMPOffloadMappingFlags Bits =
7219         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7220     switch (MapType) {
7221     case OMPC_MAP_alloc:
7222     case OMPC_MAP_release:
7223       // alloc and release is the default behavior in the runtime library,  i.e.
7224       // if we don't pass any bits alloc/release that is what the runtime is
7225       // going to do. Therefore, we don't need to signal anything for these two
7226       // type modifiers.
7227       break;
7228     case OMPC_MAP_to:
7229       Bits |= OMP_MAP_TO;
7230       break;
7231     case OMPC_MAP_from:
7232       Bits |= OMP_MAP_FROM;
7233       break;
7234     case OMPC_MAP_tofrom:
7235       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7236       break;
7237     case OMPC_MAP_delete:
7238       Bits |= OMP_MAP_DELETE;
7239       break;
7240     case OMPC_MAP_unknown:
7241       llvm_unreachable("Unexpected map type!");
7242     }
7243     if (AddPtrFlag)
7244       Bits |= OMP_MAP_PTR_AND_OBJ;
7245     if (AddIsTargetParamFlag)
7246       Bits |= OMP_MAP_TARGET_PARAM;
7247     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7248         != MapModifiers.end())
7249       Bits |= OMP_MAP_ALWAYS;
7250     return Bits;
7251   }
7252 
7253   /// Return true if the provided expression is a final array section. A
7254   /// final array section, is one whose length can't be proved to be one.
7255   bool isFinalArraySectionExpression(const Expr *E) const {
7256     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7257 
7258     // It is not an array section and therefore not a unity-size one.
7259     if (!OASE)
7260       return false;
7261 
7262     // An array section with no colon always refer to a single element.
7263     if (OASE->getColonLoc().isInvalid())
7264       return false;
7265 
7266     const Expr *Length = OASE->getLength();
7267 
7268     // If we don't have a length we have to check if the array has size 1
7269     // for this dimension. Also, we should always expect a length if the
7270     // base type is pointer.
7271     if (!Length) {
7272       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7273                              OASE->getBase()->IgnoreParenImpCasts())
7274                              .getCanonicalType();
7275       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7276         return ATy->getSize().getSExtValue() != 1;
7277       // If we don't have a constant dimension length, we have to consider
7278       // the current section as having any size, so it is not necessarily
7279       // unitary. If it happen to be unity size, that's user fault.
7280       return true;
7281     }
7282 
7283     // Check if the length evaluates to 1.
7284     Expr::EvalResult Result;
7285     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7286       return true; // Can have more that size 1.
7287 
7288     llvm::APSInt ConstLength = Result.Val.getInt();
7289     return ConstLength.getSExtValue() != 1;
7290   }
7291 
7292   /// Generate the base pointers, section pointers, sizes and map type
7293   /// bits for the provided map type, map modifier, and expression components.
7294   /// \a IsFirstComponent should be set to true if the provided set of
7295   /// components is the first associated with a capture.
7296   void generateInfoForComponentList(
7297       OpenMPMapClauseKind MapType,
7298       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7299       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7300       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7301       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7302       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7303       bool IsImplicit,
7304       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7305           OverlappedElements = llvm::None) const {
7306     // The following summarizes what has to be generated for each map and the
7307     // types below. The generated information is expressed in this order:
7308     // base pointer, section pointer, size, flags
7309     // (to add to the ones that come from the map type and modifier).
7310     //
7311     // double d;
7312     // int i[100];
7313     // float *p;
7314     //
7315     // struct S1 {
7316     //   int i;
7317     //   float f[50];
7318     // }
7319     // struct S2 {
7320     //   int i;
7321     //   float f[50];
7322     //   S1 s;
7323     //   double *p;
7324     //   struct S2 *ps;
7325     // }
7326     // S2 s;
7327     // S2 *ps;
7328     //
7329     // map(d)
7330     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7331     //
7332     // map(i)
7333     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7334     //
7335     // map(i[1:23])
7336     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7337     //
7338     // map(p)
7339     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7340     //
7341     // map(p[1:24])
7342     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7343     //
7344     // map(s)
7345     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7346     //
7347     // map(s.i)
7348     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(s.s.f)
7351     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7352     //
7353     // map(s.p)
7354     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7355     //
7356     // map(to: s.p[:22])
7357     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7358     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7359     // &(s.p), &(s.p[0]), 22*sizeof(double),
7360     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7361     // (*) alloc space for struct members, only this is a target parameter
7362     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7363     //      optimizes this entry out, same in the examples below)
7364     // (***) map the pointee (map: to)
7365     //
7366     // map(s.ps)
7367     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7368     //
7369     // map(from: s.ps->s.i)
7370     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7371     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7372     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7373     //
7374     // map(to: s.ps->ps)
7375     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7376     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7377     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7378     //
7379     // map(s.ps->ps->ps)
7380     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7381     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7382     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7383     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7384     //
7385     // map(to: s.ps->ps->s.f[:22])
7386     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7389     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7390     //
7391     // map(ps)
7392     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7393     //
7394     // map(ps->i)
7395     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7396     //
7397     // map(ps->s.f)
7398     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7399     //
7400     // map(from: ps->p)
7401     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7402     //
7403     // map(to: ps->p[:22])
7404     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7405     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7406     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7407     //
7408     // map(ps->ps)
7409     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7410     //
7411     // map(from: ps->ps->s.i)
7412     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7413     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7414     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7415     //
7416     // map(from: ps->ps->ps)
7417     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7418     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7419     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7420     //
7421     // map(ps->ps->ps->ps)
7422     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7423     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7424     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7425     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7426     //
7427     // map(to: ps->ps->ps->s.f[:22])
7428     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7431     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7432     //
7433     // map(to: s.f[:22]) map(from: s.p[:33])
7434     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7435     //     sizeof(double*) (**), TARGET_PARAM
7436     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7437     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7438     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439     // (*) allocate contiguous space needed to fit all mapped members even if
7440     //     we allocate space for members not mapped (in this example,
7441     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7442     //     them as well because they fall between &s.f[0] and &s.p)
7443     //
7444     // map(from: s.f[:22]) map(to: ps->p[:33])
7445     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7446     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7447     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7448     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7449     // (*) the struct this entry pertains to is the 2nd element in the list of
7450     //     arguments, hence MEMBER_OF(2)
7451     //
7452     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7453     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7454     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7455     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7456     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7457     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7458     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7459     // (*) the struct this entry pertains to is the 4th element in the list
7460     //     of arguments, hence MEMBER_OF(4)
7461 
7462     // Track if the map information being generated is the first for a capture.
7463     bool IsCaptureFirstInfo = IsFirstComponentList;
7464     // When the variable is on a declare target link or in a to clause with
7465     // unified memory, a reference is needed to hold the host/device address
7466     // of the variable.
7467     bool RequiresReference = false;
7468 
7469     // Scan the components from the base to the complete expression.
7470     auto CI = Components.rbegin();
7471     auto CE = Components.rend();
7472     auto I = CI;
7473 
7474     // Track if the map information being generated is the first for a list of
7475     // components.
7476     bool IsExpressionFirstInfo = true;
7477     Address BP = Address::invalid();
7478     const Expr *AssocExpr = I->getAssociatedExpression();
7479     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7480     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7481 
7482     if (isa<MemberExpr>(AssocExpr)) {
7483       // The base is the 'this' pointer. The content of the pointer is going
7484       // to be the base of the field being mapped.
7485       BP = CGF.LoadCXXThisAddress();
7486     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7487                (OASE &&
7488                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7489       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7490     } else {
7491       // The base is the reference to the variable.
7492       // BP = &Var.
7493       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7494       if (const auto *VD =
7495               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7496         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7497                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7498           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7499               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7500                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7501             RequiresReference = true;
7502             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7503           }
7504         }
7505       }
7506 
7507       // If the variable is a pointer and is being dereferenced (i.e. is not
7508       // the last component), the base has to be the pointer itself, not its
7509       // reference. References are ignored for mapping purposes.
7510       QualType Ty =
7511           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7512       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7513         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7514 
7515         // We do not need to generate individual map information for the
7516         // pointer, it can be associated with the combined storage.
7517         ++I;
7518       }
7519     }
7520 
7521     // Track whether a component of the list should be marked as MEMBER_OF some
7522     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7523     // in a component list should be marked as MEMBER_OF, all subsequent entries
7524     // do not belong to the base struct. E.g.
7525     // struct S2 s;
7526     // s.ps->ps->ps->f[:]
7527     //   (1) (2) (3) (4)
7528     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7529     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7530     // is the pointee of ps(2) which is not member of struct s, so it should not
7531     // be marked as such (it is still PTR_AND_OBJ).
7532     // The variable is initialized to false so that PTR_AND_OBJ entries which
7533     // are not struct members are not considered (e.g. array of pointers to
7534     // data).
7535     bool ShouldBeMemberOf = false;
7536 
7537     // Variable keeping track of whether or not we have encountered a component
7538     // in the component list which is a member expression. Useful when we have a
7539     // pointer or a final array section, in which case it is the previous
7540     // component in the list which tells us whether we have a member expression.
7541     // E.g. X.f[:]
7542     // While processing the final array section "[:]" it is "f" which tells us
7543     // whether we are dealing with a member of a declared struct.
7544     const MemberExpr *EncounteredME = nullptr;
7545 
7546     for (; I != CE; ++I) {
7547       // If the current component is member of a struct (parent struct) mark it.
7548       if (!EncounteredME) {
7549         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7550         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7551         // as MEMBER_OF the parent struct.
7552         if (EncounteredME)
7553           ShouldBeMemberOf = true;
7554       }
7555 
7556       auto Next = std::next(I);
7557 
7558       // We need to generate the addresses and sizes if this is the last
7559       // component, if the component is a pointer or if it is an array section
7560       // whose length can't be proved to be one. If this is a pointer, it
7561       // becomes the base address for the following components.
7562 
7563       // A final array section, is one whose length can't be proved to be one.
7564       bool IsFinalArraySection =
7565           isFinalArraySectionExpression(I->getAssociatedExpression());
7566 
7567       // Get information on whether the element is a pointer. Have to do a
7568       // special treatment for array sections given that they are built-in
7569       // types.
7570       const auto *OASE =
7571           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7572       bool IsPointer =
7573           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7574                        .getCanonicalType()
7575                        ->isAnyPointerType()) ||
7576           I->getAssociatedExpression()->getType()->isAnyPointerType();
7577 
7578       if (Next == CE || IsPointer || IsFinalArraySection) {
7579         // If this is not the last component, we expect the pointer to be
7580         // associated with an array expression or member expression.
7581         assert((Next == CE ||
7582                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7583                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7584                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7585                "Unexpected expression");
7586 
7587         Address LB =
7588             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7589 
7590         // If this component is a pointer inside the base struct then we don't
7591         // need to create any entry for it - it will be combined with the object
7592         // it is pointing to into a single PTR_AND_OBJ entry.
7593         bool IsMemberPointer =
7594             IsPointer && EncounteredME &&
7595             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7596              EncounteredME);
7597         if (!OverlappedElements.empty()) {
7598           // Handle base element with the info for overlapped elements.
7599           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7600           assert(Next == CE &&
7601                  "Expected last element for the overlapped elements.");
7602           assert(!IsPointer &&
7603                  "Unexpected base element with the pointer type.");
7604           // Mark the whole struct as the struct that requires allocation on the
7605           // device.
7606           PartialStruct.LowestElem = {0, LB};
7607           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7608               I->getAssociatedExpression()->getType());
7609           Address HB = CGF.Builder.CreateConstGEP(
7610               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7611                                                               CGF.VoidPtrTy),
7612               TypeSize.getQuantity() - 1);
7613           PartialStruct.HighestElem = {
7614               std::numeric_limits<decltype(
7615                   PartialStruct.HighestElem.first)>::max(),
7616               HB};
7617           PartialStruct.Base = BP;
7618           // Emit data for non-overlapped data.
7619           OpenMPOffloadMappingFlags Flags =
7620               OMP_MAP_MEMBER_OF |
7621               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7622                              /*AddPtrFlag=*/false,
7623                              /*AddIsTargetParamFlag=*/false);
7624           LB = BP;
7625           llvm::Value *Size = nullptr;
7626           // Do bitcopy of all non-overlapped structure elements.
7627           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7628                    Component : OverlappedElements) {
7629             Address ComponentLB = Address::invalid();
7630             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7631                  Component) {
7632               if (MC.getAssociatedDeclaration()) {
7633                 ComponentLB =
7634                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7635                         .getAddress();
7636                 Size = CGF.Builder.CreatePtrDiff(
7637                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7638                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7639                 break;
7640               }
7641             }
7642             BasePointers.push_back(BP.getPointer());
7643             Pointers.push_back(LB.getPointer());
7644             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7645                                                       /*isSigned=*/true));
7646             Types.push_back(Flags);
7647             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7648           }
7649           BasePointers.push_back(BP.getPointer());
7650           Pointers.push_back(LB.getPointer());
7651           Size = CGF.Builder.CreatePtrDiff(
7652               CGF.EmitCastToVoidPtr(
7653                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7654               CGF.EmitCastToVoidPtr(LB.getPointer()));
7655           Sizes.push_back(
7656               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7657           Types.push_back(Flags);
7658           break;
7659         }
7660         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7661         if (!IsMemberPointer) {
7662           BasePointers.push_back(BP.getPointer());
7663           Pointers.push_back(LB.getPointer());
7664           Sizes.push_back(
7665               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7666 
7667           // We need to add a pointer flag for each map that comes from the
7668           // same expression except for the first one. We also need to signal
7669           // this map is the first one that relates with the current capture
7670           // (there is a set of entries for each capture).
7671           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7672               MapType, MapModifiers, IsImplicit,
7673               !IsExpressionFirstInfo || RequiresReference,
7674               IsCaptureFirstInfo && !RequiresReference);
7675 
7676           if (!IsExpressionFirstInfo) {
7677             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7678             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7679             if (IsPointer)
7680               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7681                          OMP_MAP_DELETE);
7682 
7683             if (ShouldBeMemberOf) {
7684               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7685               // should be later updated with the correct value of MEMBER_OF.
7686               Flags |= OMP_MAP_MEMBER_OF;
7687               // From now on, all subsequent PTR_AND_OBJ entries should not be
7688               // marked as MEMBER_OF.
7689               ShouldBeMemberOf = false;
7690             }
7691           }
7692 
7693           Types.push_back(Flags);
7694         }
7695 
7696         // If we have encountered a member expression so far, keep track of the
7697         // mapped member. If the parent is "*this", then the value declaration
7698         // is nullptr.
7699         if (EncounteredME) {
7700           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7701           unsigned FieldIndex = FD->getFieldIndex();
7702 
7703           // Update info about the lowest and highest elements for this struct
7704           if (!PartialStruct.Base.isValid()) {
7705             PartialStruct.LowestElem = {FieldIndex, LB};
7706             PartialStruct.HighestElem = {FieldIndex, LB};
7707             PartialStruct.Base = BP;
7708           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7709             PartialStruct.LowestElem = {FieldIndex, LB};
7710           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7711             PartialStruct.HighestElem = {FieldIndex, LB};
7712           }
7713         }
7714 
7715         // If we have a final array section, we are done with this expression.
7716         if (IsFinalArraySection)
7717           break;
7718 
7719         // The pointer becomes the base for the next element.
7720         if (Next != CE)
7721           BP = LB;
7722 
7723         IsExpressionFirstInfo = false;
7724         IsCaptureFirstInfo = false;
7725       }
7726     }
7727   }
7728 
7729   /// Return the adjusted map modifiers if the declaration a capture refers to
7730   /// appears in a first-private clause. This is expected to be used only with
7731   /// directives that start with 'target'.
7732   MappableExprsHandler::OpenMPOffloadMappingFlags
7733   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7734     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7735 
7736     // A first private variable captured by reference will use only the
7737     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7738     // declaration is known as first-private in this handler.
7739     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7740       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7741           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7742         return MappableExprsHandler::OMP_MAP_ALWAYS |
7743                MappableExprsHandler::OMP_MAP_TO;
7744       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7745         return MappableExprsHandler::OMP_MAP_TO |
7746                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7747       return MappableExprsHandler::OMP_MAP_PRIVATE |
7748              MappableExprsHandler::OMP_MAP_TO;
7749     }
7750     return MappableExprsHandler::OMP_MAP_TO |
7751            MappableExprsHandler::OMP_MAP_FROM;
7752   }
7753 
7754   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7755     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7756     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7757                                                   << 48);
7758   }
7759 
7760   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7761                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7762     // If the entry is PTR_AND_OBJ but has not been marked with the special
7763     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7764     // marked as MEMBER_OF.
7765     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7766         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7767       return;
7768 
7769     // Reset the placeholder value to prepare the flag for the assignment of the
7770     // proper MEMBER_OF value.
7771     Flags &= ~OMP_MAP_MEMBER_OF;
7772     Flags |= MemberOfFlag;
7773   }
7774 
7775   void getPlainLayout(const CXXRecordDecl *RD,
7776                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7777                       bool AsBase) const {
7778     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7779 
7780     llvm::StructType *St =
7781         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7782 
7783     unsigned NumElements = St->getNumElements();
7784     llvm::SmallVector<
7785         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7786         RecordLayout(NumElements);
7787 
7788     // Fill bases.
7789     for (const auto &I : RD->bases()) {
7790       if (I.isVirtual())
7791         continue;
7792       const auto *Base = I.getType()->getAsCXXRecordDecl();
7793       // Ignore empty bases.
7794       if (Base->isEmpty() || CGF.getContext()
7795                                  .getASTRecordLayout(Base)
7796                                  .getNonVirtualSize()
7797                                  .isZero())
7798         continue;
7799 
7800       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7801       RecordLayout[FieldIndex] = Base;
7802     }
7803     // Fill in virtual bases.
7804     for (const auto &I : RD->vbases()) {
7805       const auto *Base = I.getType()->getAsCXXRecordDecl();
7806       // Ignore empty bases.
7807       if (Base->isEmpty())
7808         continue;
7809       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7810       if (RecordLayout[FieldIndex])
7811         continue;
7812       RecordLayout[FieldIndex] = Base;
7813     }
7814     // Fill in all the fields.
7815     assert(!RD->isUnion() && "Unexpected union.");
7816     for (const auto *Field : RD->fields()) {
7817       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7818       // will fill in later.)
7819       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7820         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7821         RecordLayout[FieldIndex] = Field;
7822       }
7823     }
7824     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7825              &Data : RecordLayout) {
7826       if (Data.isNull())
7827         continue;
7828       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7829         getPlainLayout(Base, Layout, /*AsBase=*/true);
7830       else
7831         Layout.push_back(Data.get<const FieldDecl *>());
7832     }
7833   }
7834 
7835 public:
7836   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7837       : CurDir(Dir), CGF(CGF) {
7838     // Extract firstprivate clause information.
7839     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7840       for (const auto *D : C->varlists())
7841         FirstPrivateDecls.try_emplace(
7842             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7843     // Extract device pointer clause information.
7844     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7845       for (auto L : C->component_lists())
7846         DevPointersMap[L.first].push_back(L.second);
7847   }
7848 
7849   /// Generate code for the combined entry if we have a partially mapped struct
7850   /// and take care of the mapping flags of the arguments corresponding to
7851   /// individual struct members.
7852   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7853                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7854                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7855                          const StructRangeInfoTy &PartialStruct) const {
7856     // Base is the base of the struct
7857     BasePointers.push_back(PartialStruct.Base.getPointer());
7858     // Pointer is the address of the lowest element
7859     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7860     Pointers.push_back(LB);
7861     // Size is (addr of {highest+1} element) - (addr of lowest element)
7862     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7863     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7864     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7865     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7866     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7867     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7868                                                   /*isSinged=*/false);
7869     Sizes.push_back(Size);
7870     // Map type is always TARGET_PARAM
7871     Types.push_back(OMP_MAP_TARGET_PARAM);
7872     // Remove TARGET_PARAM flag from the first element
7873     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7874 
7875     // All other current entries will be MEMBER_OF the combined entry
7876     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7877     // 0xFFFF in the MEMBER_OF field).
7878     OpenMPOffloadMappingFlags MemberOfFlag =
7879         getMemberOfFlag(BasePointers.size() - 1);
7880     for (auto &M : CurTypes)
7881       setCorrectMemberOfFlag(M, MemberOfFlag);
7882   }
7883 
7884   /// Generate all the base pointers, section pointers, sizes and map
7885   /// types for the extracted mappable expressions. Also, for each item that
7886   /// relates with a device pointer, a pair of the relevant declaration and
7887   /// index where it occurs is appended to the device pointers info array.
7888   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7889                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7890                        MapFlagsArrayTy &Types) const {
7891     // We have to process the component lists that relate with the same
7892     // declaration in a single chunk so that we can generate the map flags
7893     // correctly. Therefore, we organize all lists in a map.
7894     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7895 
7896     // Helper function to fill the information map for the different supported
7897     // clauses.
7898     auto &&InfoGen = [&Info](
7899         const ValueDecl *D,
7900         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7901         OpenMPMapClauseKind MapType,
7902         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7903         bool ReturnDevicePointer, bool IsImplicit) {
7904       const ValueDecl *VD =
7905           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7906       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7907                             IsImplicit);
7908     };
7909 
7910     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7911     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7912       for (const auto &L : C->component_lists()) {
7913         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7914             /*ReturnDevicePointer=*/false, C->isImplicit());
7915       }
7916     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7917       for (const auto &L : C->component_lists()) {
7918         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7919             /*ReturnDevicePointer=*/false, C->isImplicit());
7920       }
7921     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7922       for (const auto &L : C->component_lists()) {
7923         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7924             /*ReturnDevicePointer=*/false, C->isImplicit());
7925       }
7926 
7927     // Look at the use_device_ptr clause information and mark the existing map
7928     // entries as such. If there is no map information for an entry in the
7929     // use_device_ptr list, we create one with map type 'alloc' and zero size
7930     // section. It is the user fault if that was not mapped before. If there is
7931     // no map information and the pointer is a struct member, then we defer the
7932     // emission of that entry until the whole struct has been processed.
7933     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7934         DeferredInfo;
7935 
7936     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7937     for (const auto *C :
7938         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7939       for (const auto &L : C->component_lists()) {
7940         assert(!L.second.empty() && "Not expecting empty list of components!");
7941         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7942         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7943         const Expr *IE = L.second.back().getAssociatedExpression();
7944         // If the first component is a member expression, we have to look into
7945         // 'this', which maps to null in the map of map information. Otherwise
7946         // look directly for the information.
7947         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7948 
7949         // We potentially have map information for this declaration already.
7950         // Look for the first set of components that refer to it.
7951         if (It != Info.end()) {
7952           auto CI = std::find_if(
7953               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7954                 return MI.Components.back().getAssociatedDeclaration() == VD;
7955               });
7956           // If we found a map entry, signal that the pointer has to be returned
7957           // and move on to the next declaration.
7958           if (CI != It->second.end()) {
7959             CI->ReturnDevicePointer = true;
7960             continue;
7961           }
7962         }
7963 
7964         // We didn't find any match in our map information - generate a zero
7965         // size array section - if the pointer is a struct member we defer this
7966         // action until the whole struct has been processed.
7967         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7968         if (isa<MemberExpr>(IE)) {
7969           // Insert the pointer into Info to be processed by
7970           // generateInfoForComponentList. Because it is a member pointer
7971           // without a pointee, no entry will be generated for it, therefore
7972           // we need to generate one after the whole struct has been processed.
7973           // Nonetheless, generateInfoForComponentList must be called to take
7974           // the pointer into account for the calculation of the range of the
7975           // partial struct.
7976           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7977                   /*ReturnDevicePointer=*/false, C->isImplicit());
7978           DeferredInfo[nullptr].emplace_back(IE, VD);
7979         } else {
7980           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7981               this->CGF.EmitLValue(IE), IE->getExprLoc());
7982           BasePointers.emplace_back(Ptr, VD);
7983           Pointers.push_back(Ptr);
7984           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
7985           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7986         }
7987       }
7988     }
7989 
7990     for (const auto &M : Info) {
7991       // We need to know when we generate information for the first component
7992       // associated with a capture, because the mapping flags depend on it.
7993       bool IsFirstComponentList = true;
7994 
7995       // Temporary versions of arrays
7996       MapBaseValuesArrayTy CurBasePointers;
7997       MapValuesArrayTy CurPointers;
7998       MapValuesArrayTy CurSizes;
7999       MapFlagsArrayTy CurTypes;
8000       StructRangeInfoTy PartialStruct;
8001 
8002       for (const MapInfo &L : M.second) {
8003         assert(!L.Components.empty() &&
8004                "Not expecting declaration with no component lists.");
8005 
8006         // Remember the current base pointer index.
8007         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8008         // FIXME: MSVC 2013 seems to require this-> to find the member method.
8009         this->generateInfoForComponentList(
8010             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8011             CurPointers, CurSizes, CurTypes, PartialStruct,
8012             IsFirstComponentList, L.IsImplicit);
8013 
8014         // If this entry relates with a device pointer, set the relevant
8015         // declaration and add the 'return pointer' flag.
8016         if (L.ReturnDevicePointer) {
8017           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8018                  "Unexpected number of mapped base pointers.");
8019 
8020           const ValueDecl *RelevantVD =
8021               L.Components.back().getAssociatedDeclaration();
8022           assert(RelevantVD &&
8023                  "No relevant declaration related with device pointer??");
8024 
8025           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8026           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8027         }
8028         IsFirstComponentList = false;
8029       }
8030 
8031       // Append any pending zero-length pointers which are struct members and
8032       // used with use_device_ptr.
8033       auto CI = DeferredInfo.find(M.first);
8034       if (CI != DeferredInfo.end()) {
8035         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8036           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8037           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8038               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8039           CurBasePointers.emplace_back(BasePtr, L.VD);
8040           CurPointers.push_back(Ptr);
8041           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8042           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8043           // value MEMBER_OF=FFFF so that the entry is later updated with the
8044           // correct value of MEMBER_OF.
8045           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8046                              OMP_MAP_MEMBER_OF);
8047         }
8048       }
8049 
8050       // If there is an entry in PartialStruct it means we have a struct with
8051       // individual members mapped. Emit an extra combined entry.
8052       if (PartialStruct.Base.isValid())
8053         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8054                           PartialStruct);
8055 
8056       // We need to append the results of this capture to what we already have.
8057       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8058       Pointers.append(CurPointers.begin(), CurPointers.end());
8059       Sizes.append(CurSizes.begin(), CurSizes.end());
8060       Types.append(CurTypes.begin(), CurTypes.end());
8061     }
8062   }
8063 
8064   /// Emit capture info for lambdas for variables captured by reference.
8065   void generateInfoForLambdaCaptures(
8066       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8067       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8068       MapFlagsArrayTy &Types,
8069       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8070     const auto *RD = VD->getType()
8071                          .getCanonicalType()
8072                          .getNonReferenceType()
8073                          ->getAsCXXRecordDecl();
8074     if (!RD || !RD->isLambda())
8075       return;
8076     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8077     LValue VDLVal = CGF.MakeAddrLValue(
8078         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8079     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8080     FieldDecl *ThisCapture = nullptr;
8081     RD->getCaptureFields(Captures, ThisCapture);
8082     if (ThisCapture) {
8083       LValue ThisLVal =
8084           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8085       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8086       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8087       BasePointers.push_back(ThisLVal.getPointer());
8088       Pointers.push_back(ThisLValVal.getPointer());
8089       Sizes.push_back(
8090           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8091                                     CGF.Int64Ty, /*isSigned=*/true));
8092       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8093                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8094     }
8095     for (const LambdaCapture &LC : RD->captures()) {
8096       if (LC.getCaptureKind() != LCK_ByRef)
8097         continue;
8098       const VarDecl *VD = LC.getCapturedVar();
8099       auto It = Captures.find(VD);
8100       assert(It != Captures.end() && "Found lambda capture without field.");
8101       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8102       LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8103       LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8104       BasePointers.push_back(VarLVal.getPointer());
8105       Pointers.push_back(VarLValVal.getPointer());
8106       Sizes.push_back(CGF.Builder.CreateIntCast(
8107           CGF.getTypeSize(
8108               VD->getType().getCanonicalType().getNonReferenceType()),
8109           CGF.Int64Ty, /*isSigned=*/true));
8110       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8111                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8112     }
8113   }
8114 
8115   /// Set correct indices for lambdas captures.
8116   void adjustMemberOfForLambdaCaptures(
8117       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8118       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8119       MapFlagsArrayTy &Types) const {
8120     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8121       // Set correct member_of idx for all implicit lambda captures.
8122       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8123                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8124         continue;
8125       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8126       assert(BasePtr && "Unable to find base lambda address.");
8127       int TgtIdx = -1;
8128       for (unsigned J = I; J > 0; --J) {
8129         unsigned Idx = J - 1;
8130         if (Pointers[Idx] != BasePtr)
8131           continue;
8132         TgtIdx = Idx;
8133         break;
8134       }
8135       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8136       // All other current entries will be MEMBER_OF the combined entry
8137       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8138       // 0xFFFF in the MEMBER_OF field).
8139       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8140       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8141     }
8142   }
8143 
8144   /// Generate the base pointers, section pointers, sizes and map types
8145   /// associated to a given capture.
8146   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8147                               llvm::Value *Arg,
8148                               MapBaseValuesArrayTy &BasePointers,
8149                               MapValuesArrayTy &Pointers,
8150                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8151                               StructRangeInfoTy &PartialStruct) const {
8152     assert(!Cap->capturesVariableArrayType() &&
8153            "Not expecting to generate map info for a variable array type!");
8154 
8155     // We need to know when we generating information for the first component
8156     const ValueDecl *VD = Cap->capturesThis()
8157                               ? nullptr
8158                               : Cap->getCapturedVar()->getCanonicalDecl();
8159 
8160     // If this declaration appears in a is_device_ptr clause we just have to
8161     // pass the pointer by value. If it is a reference to a declaration, we just
8162     // pass its value.
8163     if (DevPointersMap.count(VD)) {
8164       BasePointers.emplace_back(Arg, VD);
8165       Pointers.push_back(Arg);
8166       Sizes.push_back(
8167           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8168                                     CGF.Int64Ty, /*isSigned=*/true));
8169       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8170       return;
8171     }
8172 
8173     using MapData =
8174         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8175                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8176     SmallVector<MapData, 4> DeclComponentLists;
8177     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8178     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8179       for (const auto &L : C->decl_component_lists(VD)) {
8180         assert(L.first == VD &&
8181                "We got information for the wrong declaration??");
8182         assert(!L.second.empty() &&
8183                "Not expecting declaration with no component lists.");
8184         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8185                                         C->getMapTypeModifiers(),
8186                                         C->isImplicit());
8187       }
8188     }
8189 
8190     // Find overlapping elements (including the offset from the base element).
8191     llvm::SmallDenseMap<
8192         const MapData *,
8193         llvm::SmallVector<
8194             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8195         4>
8196         OverlappedData;
8197     size_t Count = 0;
8198     for (const MapData &L : DeclComponentLists) {
8199       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8200       OpenMPMapClauseKind MapType;
8201       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8202       bool IsImplicit;
8203       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8204       ++Count;
8205       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8206         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8207         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8208         auto CI = Components.rbegin();
8209         auto CE = Components.rend();
8210         auto SI = Components1.rbegin();
8211         auto SE = Components1.rend();
8212         for (; CI != CE && SI != SE; ++CI, ++SI) {
8213           if (CI->getAssociatedExpression()->getStmtClass() !=
8214               SI->getAssociatedExpression()->getStmtClass())
8215             break;
8216           // Are we dealing with different variables/fields?
8217           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8218             break;
8219         }
8220         // Found overlapping if, at least for one component, reached the head of
8221         // the components list.
8222         if (CI == CE || SI == SE) {
8223           assert((CI != CE || SI != SE) &&
8224                  "Unexpected full match of the mapping components.");
8225           const MapData &BaseData = CI == CE ? L : L1;
8226           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8227               SI == SE ? Components : Components1;
8228           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8229           OverlappedElements.getSecond().push_back(SubData);
8230         }
8231       }
8232     }
8233     // Sort the overlapped elements for each item.
8234     llvm::SmallVector<const FieldDecl *, 4> Layout;
8235     if (!OverlappedData.empty()) {
8236       if (const auto *CRD =
8237               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8238         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8239       else {
8240         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8241         Layout.append(RD->field_begin(), RD->field_end());
8242       }
8243     }
8244     for (auto &Pair : OverlappedData) {
8245       llvm::sort(
8246           Pair.getSecond(),
8247           [&Layout](
8248               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8249               OMPClauseMappableExprCommon::MappableExprComponentListRef
8250                   Second) {
8251             auto CI = First.rbegin();
8252             auto CE = First.rend();
8253             auto SI = Second.rbegin();
8254             auto SE = Second.rend();
8255             for (; CI != CE && SI != SE; ++CI, ++SI) {
8256               if (CI->getAssociatedExpression()->getStmtClass() !=
8257                   SI->getAssociatedExpression()->getStmtClass())
8258                 break;
8259               // Are we dealing with different variables/fields?
8260               if (CI->getAssociatedDeclaration() !=
8261                   SI->getAssociatedDeclaration())
8262                 break;
8263             }
8264 
8265             // Lists contain the same elements.
8266             if (CI == CE && SI == SE)
8267               return false;
8268 
8269             // List with less elements is less than list with more elements.
8270             if (CI == CE || SI == SE)
8271               return CI == CE;
8272 
8273             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8274             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8275             if (FD1->getParent() == FD2->getParent())
8276               return FD1->getFieldIndex() < FD2->getFieldIndex();
8277             const auto It =
8278                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8279                   return FD == FD1 || FD == FD2;
8280                 });
8281             return *It == FD1;
8282           });
8283     }
8284 
8285     // Associated with a capture, because the mapping flags depend on it.
8286     // Go through all of the elements with the overlapped elements.
8287     for (const auto &Pair : OverlappedData) {
8288       const MapData &L = *Pair.getFirst();
8289       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8290       OpenMPMapClauseKind MapType;
8291       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8292       bool IsImplicit;
8293       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8294       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8295           OverlappedComponents = Pair.getSecond();
8296       bool IsFirstComponentList = true;
8297       generateInfoForComponentList(MapType, MapModifiers, Components,
8298                                    BasePointers, Pointers, Sizes, Types,
8299                                    PartialStruct, IsFirstComponentList,
8300                                    IsImplicit, OverlappedComponents);
8301     }
8302     // Go through other elements without overlapped elements.
8303     bool IsFirstComponentList = OverlappedData.empty();
8304     for (const MapData &L : DeclComponentLists) {
8305       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8306       OpenMPMapClauseKind MapType;
8307       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8308       bool IsImplicit;
8309       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8310       auto It = OverlappedData.find(&L);
8311       if (It == OverlappedData.end())
8312         generateInfoForComponentList(MapType, MapModifiers, Components,
8313                                      BasePointers, Pointers, Sizes, Types,
8314                                      PartialStruct, IsFirstComponentList,
8315                                      IsImplicit);
8316       IsFirstComponentList = false;
8317     }
8318   }
8319 
8320   /// Generate the base pointers, section pointers, sizes and map types
8321   /// associated with the declare target link variables.
8322   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8323                                         MapValuesArrayTy &Pointers,
8324                                         MapValuesArrayTy &Sizes,
8325                                         MapFlagsArrayTy &Types) const {
8326     // Map other list items in the map clause which are not captured variables
8327     // but "declare target link" global variables.
8328     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8329       for (const auto &L : C->component_lists()) {
8330         if (!L.first)
8331           continue;
8332         const auto *VD = dyn_cast<VarDecl>(L.first);
8333         if (!VD)
8334           continue;
8335         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8336             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8337         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8338             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8339           continue;
8340         StructRangeInfoTy PartialStruct;
8341         generateInfoForComponentList(
8342             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8343             Pointers, Sizes, Types, PartialStruct,
8344             /*IsFirstComponentList=*/true, C->isImplicit());
8345         assert(!PartialStruct.Base.isValid() &&
8346                "No partial structs for declare target link expected.");
8347       }
8348     }
8349   }
8350 
8351   /// Generate the default map information for a given capture \a CI,
8352   /// record field declaration \a RI and captured value \a CV.
8353   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8354                               const FieldDecl &RI, llvm::Value *CV,
8355                               MapBaseValuesArrayTy &CurBasePointers,
8356                               MapValuesArrayTy &CurPointers,
8357                               MapValuesArrayTy &CurSizes,
8358                               MapFlagsArrayTy &CurMapTypes) const {
8359     bool IsImplicit = true;
8360     // Do the default mapping.
8361     if (CI.capturesThis()) {
8362       CurBasePointers.push_back(CV);
8363       CurPointers.push_back(CV);
8364       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8365       CurSizes.push_back(
8366           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8367                                     CGF.Int64Ty, /*isSigned=*/true));
8368       // Default map type.
8369       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8370     } else if (CI.capturesVariableByCopy()) {
8371       CurBasePointers.push_back(CV);
8372       CurPointers.push_back(CV);
8373       if (!RI.getType()->isAnyPointerType()) {
8374         // We have to signal to the runtime captures passed by value that are
8375         // not pointers.
8376         CurMapTypes.push_back(OMP_MAP_LITERAL);
8377         CurSizes.push_back(CGF.Builder.CreateIntCast(
8378             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8379       } else {
8380         // Pointers are implicitly mapped with a zero size and no flags
8381         // (other than first map that is added for all implicit maps).
8382         CurMapTypes.push_back(OMP_MAP_NONE);
8383         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8384       }
8385       const VarDecl *VD = CI.getCapturedVar();
8386       auto I = FirstPrivateDecls.find(VD);
8387       if (I != FirstPrivateDecls.end())
8388         IsImplicit = I->getSecond();
8389     } else {
8390       assert(CI.capturesVariable() && "Expected captured reference.");
8391       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8392       QualType ElementType = PtrTy->getPointeeType();
8393       CurSizes.push_back(CGF.Builder.CreateIntCast(
8394           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8395       // The default map type for a scalar/complex type is 'to' because by
8396       // default the value doesn't have to be retrieved. For an aggregate
8397       // type, the default is 'tofrom'.
8398       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8399       const VarDecl *VD = CI.getCapturedVar();
8400       auto I = FirstPrivateDecls.find(VD);
8401       if (I != FirstPrivateDecls.end() &&
8402           VD->getType().isConstant(CGF.getContext())) {
8403         llvm::Constant *Addr =
8404             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8405         // Copy the value of the original variable to the new global copy.
8406         CGF.Builder.CreateMemCpy(
8407             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8408             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8409             CurSizes.back(), /*isVolatile=*/false);
8410         // Use new global variable as the base pointers.
8411         CurBasePointers.push_back(Addr);
8412         CurPointers.push_back(Addr);
8413       } else {
8414         CurBasePointers.push_back(CV);
8415         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8416           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8417               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8418               AlignmentSource::Decl));
8419           CurPointers.push_back(PtrAddr.getPointer());
8420         } else {
8421           CurPointers.push_back(CV);
8422         }
8423       }
8424       if (I != FirstPrivateDecls.end())
8425         IsImplicit = I->getSecond();
8426     }
8427     // Every default map produces a single argument which is a target parameter.
8428     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8429 
8430     // Add flag stating this is an implicit map.
8431     if (IsImplicit)
8432       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8433   }
8434 };
8435 } // anonymous namespace
8436 
8437 /// Emit the arrays used to pass the captures and map information to the
8438 /// offloading runtime library. If there is no map or capture information,
8439 /// return nullptr by reference.
8440 static void
8441 emitOffloadingArrays(CodeGenFunction &CGF,
8442                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8443                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8444                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8445                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8446                      CGOpenMPRuntime::TargetDataInfo &Info) {
8447   CodeGenModule &CGM = CGF.CGM;
8448   ASTContext &Ctx = CGF.getContext();
8449 
8450   // Reset the array information.
8451   Info.clearArrayInfo();
8452   Info.NumberOfPtrs = BasePointers.size();
8453 
8454   if (Info.NumberOfPtrs) {
8455     // Detect if we have any capture size requiring runtime evaluation of the
8456     // size so that a constant array could be eventually used.
8457     bool hasRuntimeEvaluationCaptureSize = false;
8458     for (llvm::Value *S : Sizes)
8459       if (!isa<llvm::Constant>(S)) {
8460         hasRuntimeEvaluationCaptureSize = true;
8461         break;
8462       }
8463 
8464     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8465     QualType PointerArrayType =
8466         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8467                                  /*IndexTypeQuals=*/0);
8468 
8469     Info.BasePointersArray =
8470         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8471     Info.PointersArray =
8472         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8473 
8474     // If we don't have any VLA types or other types that require runtime
8475     // evaluation, we can use a constant array for the map sizes, otherwise we
8476     // need to fill up the arrays as we do for the pointers.
8477     QualType Int64Ty =
8478         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8479     if (hasRuntimeEvaluationCaptureSize) {
8480       QualType SizeArrayType =
8481           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8482                                    /*IndexTypeQuals=*/0);
8483       Info.SizesArray =
8484           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8485     } else {
8486       // We expect all the sizes to be constant, so we collect them to create
8487       // a constant array.
8488       SmallVector<llvm::Constant *, 16> ConstSizes;
8489       for (llvm::Value *S : Sizes)
8490         ConstSizes.push_back(cast<llvm::Constant>(S));
8491 
8492       auto *SizesArrayInit = llvm::ConstantArray::get(
8493           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8494       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8495       auto *SizesArrayGbl = new llvm::GlobalVariable(
8496           CGM.getModule(), SizesArrayInit->getType(),
8497           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8498           SizesArrayInit, Name);
8499       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8500       Info.SizesArray = SizesArrayGbl;
8501     }
8502 
8503     // The map types are always constant so we don't need to generate code to
8504     // fill arrays. Instead, we create an array constant.
8505     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8506     llvm::copy(MapTypes, Mapping.begin());
8507     llvm::Constant *MapTypesArrayInit =
8508         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8509     std::string MaptypesName =
8510         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8511     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8512         CGM.getModule(), MapTypesArrayInit->getType(),
8513         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8514         MapTypesArrayInit, MaptypesName);
8515     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8516     Info.MapTypesArray = MapTypesArrayGbl;
8517 
8518     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8519       llvm::Value *BPVal = *BasePointers[I];
8520       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8521           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8522           Info.BasePointersArray, 0, I);
8523       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8524           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8525       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8526       CGF.Builder.CreateStore(BPVal, BPAddr);
8527 
8528       if (Info.requiresDevicePointerInfo())
8529         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8530           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8531 
8532       llvm::Value *PVal = Pointers[I];
8533       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8534           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8535           Info.PointersArray, 0, I);
8536       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8537           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8538       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8539       CGF.Builder.CreateStore(PVal, PAddr);
8540 
8541       if (hasRuntimeEvaluationCaptureSize) {
8542         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8543             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8544             Info.SizesArray,
8545             /*Idx0=*/0,
8546             /*Idx1=*/I);
8547         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8548         CGF.Builder.CreateStore(
8549             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8550             SAddr);
8551       }
8552     }
8553   }
8554 }
8555 /// Emit the arguments to be passed to the runtime library based on the
8556 /// arrays of pointers, sizes and map types.
8557 static void emitOffloadingArraysArgument(
8558     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8559     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8560     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8561   CodeGenModule &CGM = CGF.CGM;
8562   if (Info.NumberOfPtrs) {
8563     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8564         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8565         Info.BasePointersArray,
8566         /*Idx0=*/0, /*Idx1=*/0);
8567     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8568         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8569         Info.PointersArray,
8570         /*Idx0=*/0,
8571         /*Idx1=*/0);
8572     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8573         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8574         /*Idx0=*/0, /*Idx1=*/0);
8575     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8576         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8577         Info.MapTypesArray,
8578         /*Idx0=*/0,
8579         /*Idx1=*/0);
8580   } else {
8581     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8582     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8583     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8584     MapTypesArrayArg =
8585         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8586   }
8587 }
8588 
8589 /// Check for inner distribute directive.
8590 static const OMPExecutableDirective *
8591 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8592   const auto *CS = D.getInnermostCapturedStmt();
8593   const auto *Body =
8594       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8595   const Stmt *ChildStmt =
8596       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8597 
8598   if (const auto *NestedDir =
8599           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8600     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8601     switch (D.getDirectiveKind()) {
8602     case OMPD_target:
8603       if (isOpenMPDistributeDirective(DKind))
8604         return NestedDir;
8605       if (DKind == OMPD_teams) {
8606         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8607             /*IgnoreCaptured=*/true);
8608         if (!Body)
8609           return nullptr;
8610         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8611         if (const auto *NND =
8612                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8613           DKind = NND->getDirectiveKind();
8614           if (isOpenMPDistributeDirective(DKind))
8615             return NND;
8616         }
8617       }
8618       return nullptr;
8619     case OMPD_target_teams:
8620       if (isOpenMPDistributeDirective(DKind))
8621         return NestedDir;
8622       return nullptr;
8623     case OMPD_target_parallel:
8624     case OMPD_target_simd:
8625     case OMPD_target_parallel_for:
8626     case OMPD_target_parallel_for_simd:
8627       return nullptr;
8628     case OMPD_target_teams_distribute:
8629     case OMPD_target_teams_distribute_simd:
8630     case OMPD_target_teams_distribute_parallel_for:
8631     case OMPD_target_teams_distribute_parallel_for_simd:
8632     case OMPD_parallel:
8633     case OMPD_for:
8634     case OMPD_parallel_for:
8635     case OMPD_parallel_sections:
8636     case OMPD_for_simd:
8637     case OMPD_parallel_for_simd:
8638     case OMPD_cancel:
8639     case OMPD_cancellation_point:
8640     case OMPD_ordered:
8641     case OMPD_threadprivate:
8642     case OMPD_allocate:
8643     case OMPD_task:
8644     case OMPD_simd:
8645     case OMPD_sections:
8646     case OMPD_section:
8647     case OMPD_single:
8648     case OMPD_master:
8649     case OMPD_critical:
8650     case OMPD_taskyield:
8651     case OMPD_barrier:
8652     case OMPD_taskwait:
8653     case OMPD_taskgroup:
8654     case OMPD_atomic:
8655     case OMPD_flush:
8656     case OMPD_teams:
8657     case OMPD_target_data:
8658     case OMPD_target_exit_data:
8659     case OMPD_target_enter_data:
8660     case OMPD_distribute:
8661     case OMPD_distribute_simd:
8662     case OMPD_distribute_parallel_for:
8663     case OMPD_distribute_parallel_for_simd:
8664     case OMPD_teams_distribute:
8665     case OMPD_teams_distribute_simd:
8666     case OMPD_teams_distribute_parallel_for:
8667     case OMPD_teams_distribute_parallel_for_simd:
8668     case OMPD_target_update:
8669     case OMPD_declare_simd:
8670     case OMPD_declare_target:
8671     case OMPD_end_declare_target:
8672     case OMPD_declare_reduction:
8673     case OMPD_declare_mapper:
8674     case OMPD_taskloop:
8675     case OMPD_taskloop_simd:
8676     case OMPD_requires:
8677     case OMPD_unknown:
8678       llvm_unreachable("Unexpected directive.");
8679     }
8680   }
8681 
8682   return nullptr;
8683 }
8684 
8685 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8686     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8687     const llvm::function_ref<llvm::Value *(
8688         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8689   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8690   const OMPExecutableDirective *TD = &D;
8691   // Get nested teams distribute kind directive, if any.
8692   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8693     TD = getNestedDistributeDirective(CGM.getContext(), D);
8694   if (!TD)
8695     return;
8696   const auto *LD = cast<OMPLoopDirective>(TD);
8697   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8698                                                      PrePostActionTy &) {
8699     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8700 
8701     // Emit device ID if any.
8702     llvm::Value *DeviceID;
8703     if (Device)
8704       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8705                                            CGF.Int64Ty, /*isSigned=*/true);
8706     else
8707       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8708 
8709     llvm::Value *Args[] = {DeviceID, NumIterations};
8710     CGF.EmitRuntimeCall(
8711         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8712   };
8713   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8714 }
8715 
8716 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8717                                      const OMPExecutableDirective &D,
8718                                      llvm::Function *OutlinedFn,
8719                                      llvm::Value *OutlinedFnID,
8720                                      const Expr *IfCond, const Expr *Device) {
8721   if (!CGF.HaveInsertPoint())
8722     return;
8723 
8724   assert(OutlinedFn && "Invalid outlined function!");
8725 
8726   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8727   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8728   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8729   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8730                                             PrePostActionTy &) {
8731     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8732   };
8733   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8734 
8735   CodeGenFunction::OMPTargetDataInfo InputInfo;
8736   llvm::Value *MapTypesArray = nullptr;
8737   // Fill up the pointer arrays and transfer execution to the device.
8738   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8739                     &MapTypesArray, &CS, RequiresOuterTask,
8740                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8741     // On top of the arrays that were filled up, the target offloading call
8742     // takes as arguments the device id as well as the host pointer. The host
8743     // pointer is used by the runtime library to identify the current target
8744     // region, so it only has to be unique and not necessarily point to
8745     // anything. It could be the pointer to the outlined function that
8746     // implements the target region, but we aren't using that so that the
8747     // compiler doesn't need to keep that, and could therefore inline the host
8748     // function if proven worthwhile during optimization.
8749 
8750     // From this point on, we need to have an ID of the target region defined.
8751     assert(OutlinedFnID && "Invalid outlined function ID!");
8752 
8753     // Emit device ID if any.
8754     llvm::Value *DeviceID;
8755     if (Device) {
8756       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8757                                            CGF.Int64Ty, /*isSigned=*/true);
8758     } else {
8759       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8760     }
8761 
8762     // Emit the number of elements in the offloading arrays.
8763     llvm::Value *PointerNum =
8764         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8765 
8766     // Return value of the runtime offloading call.
8767     llvm::Value *Return;
8768 
8769     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8770     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8771 
8772     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8773     // The target region is an outlined function launched by the runtime
8774     // via calls __tgt_target() or __tgt_target_teams().
8775     //
8776     // __tgt_target() launches a target region with one team and one thread,
8777     // executing a serial region.  This master thread may in turn launch
8778     // more threads within its team upon encountering a parallel region,
8779     // however, no additional teams can be launched on the device.
8780     //
8781     // __tgt_target_teams() launches a target region with one or more teams,
8782     // each with one or more threads.  This call is required for target
8783     // constructs such as:
8784     //  'target teams'
8785     //  'target' / 'teams'
8786     //  'target teams distribute parallel for'
8787     //  'target parallel'
8788     // and so on.
8789     //
8790     // Note that on the host and CPU targets, the runtime implementation of
8791     // these calls simply call the outlined function without forking threads.
8792     // The outlined functions themselves have runtime calls to
8793     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8794     // the compiler in emitTeamsCall() and emitParallelCall().
8795     //
8796     // In contrast, on the NVPTX target, the implementation of
8797     // __tgt_target_teams() launches a GPU kernel with the requested number
8798     // of teams and threads so no additional calls to the runtime are required.
8799     if (NumTeams) {
8800       // If we have NumTeams defined this means that we have an enclosed teams
8801       // region. Therefore we also expect to have NumThreads defined. These two
8802       // values should be defined in the presence of a teams directive,
8803       // regardless of having any clauses associated. If the user is using teams
8804       // but no clauses, these two values will be the default that should be
8805       // passed to the runtime library - a 32-bit integer with the value zero.
8806       assert(NumThreads && "Thread limit expression should be available along "
8807                            "with number of teams.");
8808       llvm::Value *OffloadingArgs[] = {DeviceID,
8809                                        OutlinedFnID,
8810                                        PointerNum,
8811                                        InputInfo.BasePointersArray.getPointer(),
8812                                        InputInfo.PointersArray.getPointer(),
8813                                        InputInfo.SizesArray.getPointer(),
8814                                        MapTypesArray,
8815                                        NumTeams,
8816                                        NumThreads};
8817       Return = CGF.EmitRuntimeCall(
8818           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8819                                           : OMPRTL__tgt_target_teams),
8820           OffloadingArgs);
8821     } else {
8822       llvm::Value *OffloadingArgs[] = {DeviceID,
8823                                        OutlinedFnID,
8824                                        PointerNum,
8825                                        InputInfo.BasePointersArray.getPointer(),
8826                                        InputInfo.PointersArray.getPointer(),
8827                                        InputInfo.SizesArray.getPointer(),
8828                                        MapTypesArray};
8829       Return = CGF.EmitRuntimeCall(
8830           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8831                                           : OMPRTL__tgt_target),
8832           OffloadingArgs);
8833     }
8834 
8835     // Check the error code and execute the host version if required.
8836     llvm::BasicBlock *OffloadFailedBlock =
8837         CGF.createBasicBlock("omp_offload.failed");
8838     llvm::BasicBlock *OffloadContBlock =
8839         CGF.createBasicBlock("omp_offload.cont");
8840     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8841     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8842 
8843     CGF.EmitBlock(OffloadFailedBlock);
8844     if (RequiresOuterTask) {
8845       CapturedVars.clear();
8846       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8847     }
8848     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8849     CGF.EmitBranch(OffloadContBlock);
8850 
8851     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8852   };
8853 
8854   // Notify that the host version must be executed.
8855   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8856                     RequiresOuterTask](CodeGenFunction &CGF,
8857                                        PrePostActionTy &) {
8858     if (RequiresOuterTask) {
8859       CapturedVars.clear();
8860       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8861     }
8862     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8863   };
8864 
8865   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8866                           &CapturedVars, RequiresOuterTask,
8867                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8868     // Fill up the arrays with all the captured variables.
8869     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8870     MappableExprsHandler::MapValuesArrayTy Pointers;
8871     MappableExprsHandler::MapValuesArrayTy Sizes;
8872     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8873 
8874     // Get mappable expression information.
8875     MappableExprsHandler MEHandler(D, CGF);
8876     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8877 
8878     auto RI = CS.getCapturedRecordDecl()->field_begin();
8879     auto CV = CapturedVars.begin();
8880     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8881                                               CE = CS.capture_end();
8882          CI != CE; ++CI, ++RI, ++CV) {
8883       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8884       MappableExprsHandler::MapValuesArrayTy CurPointers;
8885       MappableExprsHandler::MapValuesArrayTy CurSizes;
8886       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8887       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8888 
8889       // VLA sizes are passed to the outlined region by copy and do not have map
8890       // information associated.
8891       if (CI->capturesVariableArrayType()) {
8892         CurBasePointers.push_back(*CV);
8893         CurPointers.push_back(*CV);
8894         CurSizes.push_back(CGF.Builder.CreateIntCast(
8895             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
8896         // Copy to the device as an argument. No need to retrieve it.
8897         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8898                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
8899                               MappableExprsHandler::OMP_MAP_IMPLICIT);
8900       } else {
8901         // If we have any information in the map clause, we use it, otherwise we
8902         // just do a default mapping.
8903         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8904                                          CurSizes, CurMapTypes, PartialStruct);
8905         if (CurBasePointers.empty())
8906           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8907                                            CurPointers, CurSizes, CurMapTypes);
8908         // Generate correct mapping for variables captured by reference in
8909         // lambdas.
8910         if (CI->capturesVariable())
8911           MEHandler.generateInfoForLambdaCaptures(
8912               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8913               CurMapTypes, LambdaPointers);
8914       }
8915       // We expect to have at least an element of information for this capture.
8916       assert(!CurBasePointers.empty() &&
8917              "Non-existing map pointer for capture!");
8918       assert(CurBasePointers.size() == CurPointers.size() &&
8919              CurBasePointers.size() == CurSizes.size() &&
8920              CurBasePointers.size() == CurMapTypes.size() &&
8921              "Inconsistent map information sizes!");
8922 
8923       // If there is an entry in PartialStruct it means we have a struct with
8924       // individual members mapped. Emit an extra combined entry.
8925       if (PartialStruct.Base.isValid())
8926         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8927                                     CurMapTypes, PartialStruct);
8928 
8929       // We need to append the results of this capture to what we already have.
8930       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8931       Pointers.append(CurPointers.begin(), CurPointers.end());
8932       Sizes.append(CurSizes.begin(), CurSizes.end());
8933       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8934     }
8935     // Adjust MEMBER_OF flags for the lambdas captures.
8936     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8937                                               Pointers, MapTypes);
8938     // Map other list items in the map clause which are not captured variables
8939     // but "declare target link" global variables.
8940     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8941                                                MapTypes);
8942 
8943     TargetDataInfo Info;
8944     // Fill up the arrays and create the arguments.
8945     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8946     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8947                                  Info.PointersArray, Info.SizesArray,
8948                                  Info.MapTypesArray, Info);
8949     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8950     InputInfo.BasePointersArray =
8951         Address(Info.BasePointersArray, CGM.getPointerAlign());
8952     InputInfo.PointersArray =
8953         Address(Info.PointersArray, CGM.getPointerAlign());
8954     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8955     MapTypesArray = Info.MapTypesArray;
8956     if (RequiresOuterTask)
8957       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8958     else
8959       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8960   };
8961 
8962   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8963                              CodeGenFunction &CGF, PrePostActionTy &) {
8964     if (RequiresOuterTask) {
8965       CodeGenFunction::OMPTargetDataInfo InputInfo;
8966       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8967     } else {
8968       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8969     }
8970   };
8971 
8972   // If we have a target function ID it means that we need to support
8973   // offloading, otherwise, just execute on the host. We need to execute on host
8974   // regardless of the conditional in the if clause if, e.g., the user do not
8975   // specify target triples.
8976   if (OutlinedFnID) {
8977     if (IfCond) {
8978       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8979     } else {
8980       RegionCodeGenTy ThenRCG(TargetThenGen);
8981       ThenRCG(CGF);
8982     }
8983   } else {
8984     RegionCodeGenTy ElseRCG(TargetElseGen);
8985     ElseRCG(CGF);
8986   }
8987 }
8988 
8989 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8990                                                     StringRef ParentName) {
8991   if (!S)
8992     return;
8993 
8994   // Codegen OMP target directives that offload compute to the device.
8995   bool RequiresDeviceCodegen =
8996       isa<OMPExecutableDirective>(S) &&
8997       isOpenMPTargetExecutionDirective(
8998           cast<OMPExecutableDirective>(S)->getDirectiveKind());
8999 
9000   if (RequiresDeviceCodegen) {
9001     const auto &E = *cast<OMPExecutableDirective>(S);
9002     unsigned DeviceID;
9003     unsigned FileID;
9004     unsigned Line;
9005     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9006                              FileID, Line);
9007 
9008     // Is this a target region that should not be emitted as an entry point? If
9009     // so just signal we are done with this target region.
9010     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9011                                                             ParentName, Line))
9012       return;
9013 
9014     switch (E.getDirectiveKind()) {
9015     case OMPD_target:
9016       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9017                                                    cast<OMPTargetDirective>(E));
9018       break;
9019     case OMPD_target_parallel:
9020       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9021           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9022       break;
9023     case OMPD_target_teams:
9024       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9025           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9026       break;
9027     case OMPD_target_teams_distribute:
9028       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9029           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9030       break;
9031     case OMPD_target_teams_distribute_simd:
9032       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9033           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9034       break;
9035     case OMPD_target_parallel_for:
9036       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9037           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9038       break;
9039     case OMPD_target_parallel_for_simd:
9040       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9041           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9042       break;
9043     case OMPD_target_simd:
9044       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9045           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9046       break;
9047     case OMPD_target_teams_distribute_parallel_for:
9048       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9049           CGM, ParentName,
9050           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9051       break;
9052     case OMPD_target_teams_distribute_parallel_for_simd:
9053       CodeGenFunction::
9054           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9055               CGM, ParentName,
9056               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9057       break;
9058     case OMPD_parallel:
9059     case OMPD_for:
9060     case OMPD_parallel_for:
9061     case OMPD_parallel_sections:
9062     case OMPD_for_simd:
9063     case OMPD_parallel_for_simd:
9064     case OMPD_cancel:
9065     case OMPD_cancellation_point:
9066     case OMPD_ordered:
9067     case OMPD_threadprivate:
9068     case OMPD_allocate:
9069     case OMPD_task:
9070     case OMPD_simd:
9071     case OMPD_sections:
9072     case OMPD_section:
9073     case OMPD_single:
9074     case OMPD_master:
9075     case OMPD_critical:
9076     case OMPD_taskyield:
9077     case OMPD_barrier:
9078     case OMPD_taskwait:
9079     case OMPD_taskgroup:
9080     case OMPD_atomic:
9081     case OMPD_flush:
9082     case OMPD_teams:
9083     case OMPD_target_data:
9084     case OMPD_target_exit_data:
9085     case OMPD_target_enter_data:
9086     case OMPD_distribute:
9087     case OMPD_distribute_simd:
9088     case OMPD_distribute_parallel_for:
9089     case OMPD_distribute_parallel_for_simd:
9090     case OMPD_teams_distribute:
9091     case OMPD_teams_distribute_simd:
9092     case OMPD_teams_distribute_parallel_for:
9093     case OMPD_teams_distribute_parallel_for_simd:
9094     case OMPD_target_update:
9095     case OMPD_declare_simd:
9096     case OMPD_declare_target:
9097     case OMPD_end_declare_target:
9098     case OMPD_declare_reduction:
9099     case OMPD_declare_mapper:
9100     case OMPD_taskloop:
9101     case OMPD_taskloop_simd:
9102     case OMPD_requires:
9103     case OMPD_unknown:
9104       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9105     }
9106     return;
9107   }
9108 
9109   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9110     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9111       return;
9112 
9113     scanForTargetRegionsFunctions(
9114         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9115     return;
9116   }
9117 
9118   // If this is a lambda function, look into its body.
9119   if (const auto *L = dyn_cast<LambdaExpr>(S))
9120     S = L->getBody();
9121 
9122   // Keep looking for target regions recursively.
9123   for (const Stmt *II : S->children())
9124     scanForTargetRegionsFunctions(II, ParentName);
9125 }
9126 
9127 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9128   // If emitting code for the host, we do not process FD here. Instead we do
9129   // the normal code generation.
9130   if (!CGM.getLangOpts().OpenMPIsDevice)
9131     return false;
9132 
9133   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9134   StringRef Name = CGM.getMangledName(GD);
9135   // Try to detect target regions in the function.
9136   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9137     scanForTargetRegionsFunctions(FD->getBody(), Name);
9138 
9139   // Do not to emit function if it is not marked as declare target.
9140   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9141          AlreadyEmittedTargetFunctions.count(Name) == 0;
9142 }
9143 
9144 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9145   if (!CGM.getLangOpts().OpenMPIsDevice)
9146     return false;
9147 
9148   // Check if there are Ctors/Dtors in this declaration and look for target
9149   // regions in it. We use the complete variant to produce the kernel name
9150   // mangling.
9151   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9152   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9153     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9154       StringRef ParentName =
9155           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9156       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9157     }
9158     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9159       StringRef ParentName =
9160           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9161       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9162     }
9163   }
9164 
9165   // Do not to emit variable if it is not marked as declare target.
9166   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9167       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9168           cast<VarDecl>(GD.getDecl()));
9169   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9170       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9171        HasRequiresUnifiedSharedMemory)) {
9172     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9173     return true;
9174   }
9175   return false;
9176 }
9177 
9178 llvm::Constant *
9179 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9180                                                 const VarDecl *VD) {
9181   assert(VD->getType().isConstant(CGM.getContext()) &&
9182          "Expected constant variable.");
9183   StringRef VarName;
9184   llvm::Constant *Addr;
9185   llvm::GlobalValue::LinkageTypes Linkage;
9186   QualType Ty = VD->getType();
9187   SmallString<128> Buffer;
9188   {
9189     unsigned DeviceID;
9190     unsigned FileID;
9191     unsigned Line;
9192     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9193                              FileID, Line);
9194     llvm::raw_svector_ostream OS(Buffer);
9195     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9196        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9197     VarName = OS.str();
9198   }
9199   Linkage = llvm::GlobalValue::InternalLinkage;
9200   Addr =
9201       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9202                                   getDefaultFirstprivateAddressSpace());
9203   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9204   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9205   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9206   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9207       VarName, Addr, VarSize,
9208       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9209   return Addr;
9210 }
9211 
9212 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9213                                                    llvm::Constant *Addr) {
9214   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9215       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9216   if (!Res) {
9217     if (CGM.getLangOpts().OpenMPIsDevice) {
9218       // Register non-target variables being emitted in device code (debug info
9219       // may cause this).
9220       StringRef VarName = CGM.getMangledName(VD);
9221       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9222     }
9223     return;
9224   }
9225   // Register declare target variables.
9226   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9227   StringRef VarName;
9228   CharUnits VarSize;
9229   llvm::GlobalValue::LinkageTypes Linkage;
9230 
9231   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9232       !HasRequiresUnifiedSharedMemory) {
9233     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9234     VarName = CGM.getMangledName(VD);
9235     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9236       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9237       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9238     } else {
9239       VarSize = CharUnits::Zero();
9240     }
9241     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9242     // Temp solution to prevent optimizations of the internal variables.
9243     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9244       std::string RefName = getName({VarName, "ref"});
9245       if (!CGM.GetGlobalValue(RefName)) {
9246         llvm::Constant *AddrRef =
9247             getOrCreateInternalVariable(Addr->getType(), RefName);
9248         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9249         GVAddrRef->setConstant(/*Val=*/true);
9250         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9251         GVAddrRef->setInitializer(Addr);
9252         CGM.addCompilerUsedGlobal(GVAddrRef);
9253       }
9254     }
9255   } else {
9256     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9257             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9258              HasRequiresUnifiedSharedMemory)) &&
9259            "Declare target attribute must link or to with unified memory.");
9260     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9261       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9262     else
9263       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9264 
9265     if (CGM.getLangOpts().OpenMPIsDevice) {
9266       VarName = Addr->getName();
9267       Addr = nullptr;
9268     } else {
9269       VarName = getAddrOfDeclareTargetVar(VD).getName();
9270       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9271     }
9272     VarSize = CGM.getPointerSize();
9273     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9274   }
9275 
9276   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9277       VarName, Addr, VarSize, Flags, Linkage);
9278 }
9279 
9280 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9281   if (isa<FunctionDecl>(GD.getDecl()) ||
9282       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9283     return emitTargetFunctions(GD);
9284 
9285   return emitTargetGlobalVariable(GD);
9286 }
9287 
9288 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9289   for (const VarDecl *VD : DeferredGlobalVariables) {
9290     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9291         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9292     if (!Res)
9293       continue;
9294     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9295         !HasRequiresUnifiedSharedMemory) {
9296       CGM.EmitGlobal(VD);
9297     } else {
9298       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9299               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9300                HasRequiresUnifiedSharedMemory)) &&
9301              "Expected link clause or to clause with unified memory.");
9302       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9303     }
9304   }
9305 }
9306 
9307 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9308     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9309   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9310          " Expected target-based directive.");
9311 }
9312 
9313 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9314     const OMPRequiresDecl *D) {
9315   for (const OMPClause *Clause : D->clauselists()) {
9316     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9317       HasRequiresUnifiedSharedMemory = true;
9318       break;
9319     }
9320   }
9321 }
9322 
9323 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9324                                                        LangAS &AS) {
9325   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9326     return false;
9327   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9328   switch(A->getAllocatorType()) {
9329   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9330   // Not supported, fallback to the default mem space.
9331   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9332   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9333   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9334   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9335   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9336   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9337   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9338     AS = LangAS::Default;
9339     return true;
9340   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9341     llvm_unreachable("Expected predefined allocator for the variables with the "
9342                      "static storage.");
9343   }
9344   return false;
9345 }
9346 
9347 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9348   return HasRequiresUnifiedSharedMemory;
9349 }
9350 
9351 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9352     CodeGenModule &CGM)
9353     : CGM(CGM) {
9354   if (CGM.getLangOpts().OpenMPIsDevice) {
9355     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9356     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9357   }
9358 }
9359 
9360 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9361   if (CGM.getLangOpts().OpenMPIsDevice)
9362     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9363 }
9364 
9365 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9366   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9367     return true;
9368 
9369   StringRef Name = CGM.getMangledName(GD);
9370   const auto *D = cast<FunctionDecl>(GD.getDecl());
9371   // Do not to emit function if it is marked as declare target as it was already
9372   // emitted.
9373   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9374     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9375       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9376         return !F->isDeclaration();
9377       return false;
9378     }
9379     return true;
9380   }
9381 
9382   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9383 }
9384 
9385 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9386   // If we don't have entries or if we are emitting code for the device, we
9387   // don't need to do anything.
9388   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9389       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9390       (OffloadEntriesInfoManager.empty() &&
9391        !HasEmittedDeclareTargetRegion &&
9392        !HasEmittedTargetRegion))
9393     return nullptr;
9394 
9395   // Create and register the function that handles the requires directives.
9396   ASTContext &C = CGM.getContext();
9397 
9398   llvm::Function *RequiresRegFn;
9399   {
9400     CodeGenFunction CGF(CGM);
9401     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9402     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9403     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9404     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9405     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9406     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9407     // TODO: check for other requires clauses.
9408     // The requires directive takes effect only when a target region is
9409     // present in the compilation unit. Otherwise it is ignored and not
9410     // passed to the runtime. This avoids the runtime from throwing an error
9411     // for mismatching requires clauses across compilation units that don't
9412     // contain at least 1 target region.
9413     assert((HasEmittedTargetRegion ||
9414             HasEmittedDeclareTargetRegion ||
9415             !OffloadEntriesInfoManager.empty()) &&
9416            "Target or declare target region expected.");
9417     if (HasRequiresUnifiedSharedMemory)
9418       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9419     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9420         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9421     CGF.FinishFunction();
9422   }
9423   return RequiresRegFn;
9424 }
9425 
9426 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9427   // If we have offloading in the current module, we need to emit the entries
9428   // now and register the offloading descriptor.
9429   createOffloadEntriesAndInfoMetadata();
9430 
9431   // Create and register the offloading binary descriptors. This is the main
9432   // entity that captures all the information about offloading in the current
9433   // compilation unit.
9434   return createOffloadingBinaryDescriptorRegistration();
9435 }
9436 
9437 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9438                                     const OMPExecutableDirective &D,
9439                                     SourceLocation Loc,
9440                                     llvm::Function *OutlinedFn,
9441                                     ArrayRef<llvm::Value *> CapturedVars) {
9442   if (!CGF.HaveInsertPoint())
9443     return;
9444 
9445   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9446   CodeGenFunction::RunCleanupsScope Scope(CGF);
9447 
9448   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9449   llvm::Value *Args[] = {
9450       RTLoc,
9451       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9452       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9453   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9454   RealArgs.append(std::begin(Args), std::end(Args));
9455   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9456 
9457   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9458   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9459 }
9460 
9461 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9462                                          const Expr *NumTeams,
9463                                          const Expr *ThreadLimit,
9464                                          SourceLocation Loc) {
9465   if (!CGF.HaveInsertPoint())
9466     return;
9467 
9468   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9469 
9470   llvm::Value *NumTeamsVal =
9471       NumTeams
9472           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9473                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9474           : CGF.Builder.getInt32(0);
9475 
9476   llvm::Value *ThreadLimitVal =
9477       ThreadLimit
9478           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9479                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9480           : CGF.Builder.getInt32(0);
9481 
9482   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9483   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9484                                      ThreadLimitVal};
9485   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9486                       PushNumTeamsArgs);
9487 }
9488 
9489 void CGOpenMPRuntime::emitTargetDataCalls(
9490     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9491     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9492   if (!CGF.HaveInsertPoint())
9493     return;
9494 
9495   // Action used to replace the default codegen action and turn privatization
9496   // off.
9497   PrePostActionTy NoPrivAction;
9498 
9499   // Generate the code for the opening of the data environment. Capture all the
9500   // arguments of the runtime call by reference because they are used in the
9501   // closing of the region.
9502   auto &&BeginThenGen = [this, &D, Device, &Info,
9503                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9504     // Fill up the arrays with all the mapped variables.
9505     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9506     MappableExprsHandler::MapValuesArrayTy Pointers;
9507     MappableExprsHandler::MapValuesArrayTy Sizes;
9508     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9509 
9510     // Get map clause information.
9511     MappableExprsHandler MCHandler(D, CGF);
9512     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9513 
9514     // Fill up the arrays and create the arguments.
9515     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9516 
9517     llvm::Value *BasePointersArrayArg = nullptr;
9518     llvm::Value *PointersArrayArg = nullptr;
9519     llvm::Value *SizesArrayArg = nullptr;
9520     llvm::Value *MapTypesArrayArg = nullptr;
9521     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9522                                  SizesArrayArg, MapTypesArrayArg, Info);
9523 
9524     // Emit device ID if any.
9525     llvm::Value *DeviceID = nullptr;
9526     if (Device) {
9527       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9528                                            CGF.Int64Ty, /*isSigned=*/true);
9529     } else {
9530       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9531     }
9532 
9533     // Emit the number of elements in the offloading arrays.
9534     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9535 
9536     llvm::Value *OffloadingArgs[] = {
9537         DeviceID,         PointerNum,    BasePointersArrayArg,
9538         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9539     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9540                         OffloadingArgs);
9541 
9542     // If device pointer privatization is required, emit the body of the region
9543     // here. It will have to be duplicated: with and without privatization.
9544     if (!Info.CaptureDeviceAddrMap.empty())
9545       CodeGen(CGF);
9546   };
9547 
9548   // Generate code for the closing of the data region.
9549   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9550                                             PrePostActionTy &) {
9551     assert(Info.isValid() && "Invalid data environment closing arguments.");
9552 
9553     llvm::Value *BasePointersArrayArg = nullptr;
9554     llvm::Value *PointersArrayArg = nullptr;
9555     llvm::Value *SizesArrayArg = nullptr;
9556     llvm::Value *MapTypesArrayArg = nullptr;
9557     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9558                                  SizesArrayArg, MapTypesArrayArg, Info);
9559 
9560     // Emit device ID if any.
9561     llvm::Value *DeviceID = nullptr;
9562     if (Device) {
9563       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9564                                            CGF.Int64Ty, /*isSigned=*/true);
9565     } else {
9566       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9567     }
9568 
9569     // Emit the number of elements in the offloading arrays.
9570     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9571 
9572     llvm::Value *OffloadingArgs[] = {
9573         DeviceID,         PointerNum,    BasePointersArrayArg,
9574         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9575     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9576                         OffloadingArgs);
9577   };
9578 
9579   // If we need device pointer privatization, we need to emit the body of the
9580   // region with no privatization in the 'else' branch of the conditional.
9581   // Otherwise, we don't have to do anything.
9582   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9583                                                          PrePostActionTy &) {
9584     if (!Info.CaptureDeviceAddrMap.empty()) {
9585       CodeGen.setAction(NoPrivAction);
9586       CodeGen(CGF);
9587     }
9588   };
9589 
9590   // We don't have to do anything to close the region if the if clause evaluates
9591   // to false.
9592   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9593 
9594   if (IfCond) {
9595     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9596   } else {
9597     RegionCodeGenTy RCG(BeginThenGen);
9598     RCG(CGF);
9599   }
9600 
9601   // If we don't require privatization of device pointers, we emit the body in
9602   // between the runtime calls. This avoids duplicating the body code.
9603   if (Info.CaptureDeviceAddrMap.empty()) {
9604     CodeGen.setAction(NoPrivAction);
9605     CodeGen(CGF);
9606   }
9607 
9608   if (IfCond) {
9609     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9610   } else {
9611     RegionCodeGenTy RCG(EndThenGen);
9612     RCG(CGF);
9613   }
9614 }
9615 
9616 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9617     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9618     const Expr *Device) {
9619   if (!CGF.HaveInsertPoint())
9620     return;
9621 
9622   assert((isa<OMPTargetEnterDataDirective>(D) ||
9623           isa<OMPTargetExitDataDirective>(D) ||
9624           isa<OMPTargetUpdateDirective>(D)) &&
9625          "Expecting either target enter, exit data, or update directives.");
9626 
9627   CodeGenFunction::OMPTargetDataInfo InputInfo;
9628   llvm::Value *MapTypesArray = nullptr;
9629   // Generate the code for the opening of the data environment.
9630   auto &&ThenGen = [this, &D, Device, &InputInfo,
9631                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9632     // Emit device ID if any.
9633     llvm::Value *DeviceID = nullptr;
9634     if (Device) {
9635       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9636                                            CGF.Int64Ty, /*isSigned=*/true);
9637     } else {
9638       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9639     }
9640 
9641     // Emit the number of elements in the offloading arrays.
9642     llvm::Constant *PointerNum =
9643         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9644 
9645     llvm::Value *OffloadingArgs[] = {DeviceID,
9646                                      PointerNum,
9647                                      InputInfo.BasePointersArray.getPointer(),
9648                                      InputInfo.PointersArray.getPointer(),
9649                                      InputInfo.SizesArray.getPointer(),
9650                                      MapTypesArray};
9651 
9652     // Select the right runtime function call for each expected standalone
9653     // directive.
9654     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9655     OpenMPRTLFunction RTLFn;
9656     switch (D.getDirectiveKind()) {
9657     case OMPD_target_enter_data:
9658       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9659                         : OMPRTL__tgt_target_data_begin;
9660       break;
9661     case OMPD_target_exit_data:
9662       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9663                         : OMPRTL__tgt_target_data_end;
9664       break;
9665     case OMPD_target_update:
9666       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9667                         : OMPRTL__tgt_target_data_update;
9668       break;
9669     case OMPD_parallel:
9670     case OMPD_for:
9671     case OMPD_parallel_for:
9672     case OMPD_parallel_sections:
9673     case OMPD_for_simd:
9674     case OMPD_parallel_for_simd:
9675     case OMPD_cancel:
9676     case OMPD_cancellation_point:
9677     case OMPD_ordered:
9678     case OMPD_threadprivate:
9679     case OMPD_allocate:
9680     case OMPD_task:
9681     case OMPD_simd:
9682     case OMPD_sections:
9683     case OMPD_section:
9684     case OMPD_single:
9685     case OMPD_master:
9686     case OMPD_critical:
9687     case OMPD_taskyield:
9688     case OMPD_barrier:
9689     case OMPD_taskwait:
9690     case OMPD_taskgroup:
9691     case OMPD_atomic:
9692     case OMPD_flush:
9693     case OMPD_teams:
9694     case OMPD_target_data:
9695     case OMPD_distribute:
9696     case OMPD_distribute_simd:
9697     case OMPD_distribute_parallel_for:
9698     case OMPD_distribute_parallel_for_simd:
9699     case OMPD_teams_distribute:
9700     case OMPD_teams_distribute_simd:
9701     case OMPD_teams_distribute_parallel_for:
9702     case OMPD_teams_distribute_parallel_for_simd:
9703     case OMPD_declare_simd:
9704     case OMPD_declare_target:
9705     case OMPD_end_declare_target:
9706     case OMPD_declare_reduction:
9707     case OMPD_declare_mapper:
9708     case OMPD_taskloop:
9709     case OMPD_taskloop_simd:
9710     case OMPD_target:
9711     case OMPD_target_simd:
9712     case OMPD_target_teams_distribute:
9713     case OMPD_target_teams_distribute_simd:
9714     case OMPD_target_teams_distribute_parallel_for:
9715     case OMPD_target_teams_distribute_parallel_for_simd:
9716     case OMPD_target_teams:
9717     case OMPD_target_parallel:
9718     case OMPD_target_parallel_for:
9719     case OMPD_target_parallel_for_simd:
9720     case OMPD_requires:
9721     case OMPD_unknown:
9722       llvm_unreachable("Unexpected standalone target data directive.");
9723       break;
9724     }
9725     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9726   };
9727 
9728   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9729                              CodeGenFunction &CGF, PrePostActionTy &) {
9730     // Fill up the arrays with all the mapped variables.
9731     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9732     MappableExprsHandler::MapValuesArrayTy Pointers;
9733     MappableExprsHandler::MapValuesArrayTy Sizes;
9734     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9735 
9736     // Get map clause information.
9737     MappableExprsHandler MEHandler(D, CGF);
9738     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9739 
9740     TargetDataInfo Info;
9741     // Fill up the arrays and create the arguments.
9742     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9743     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9744                                  Info.PointersArray, Info.SizesArray,
9745                                  Info.MapTypesArray, Info);
9746     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9747     InputInfo.BasePointersArray =
9748         Address(Info.BasePointersArray, CGM.getPointerAlign());
9749     InputInfo.PointersArray =
9750         Address(Info.PointersArray, CGM.getPointerAlign());
9751     InputInfo.SizesArray =
9752         Address(Info.SizesArray, CGM.getPointerAlign());
9753     MapTypesArray = Info.MapTypesArray;
9754     if (D.hasClausesOfKind<OMPDependClause>())
9755       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9756     else
9757       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9758   };
9759 
9760   if (IfCond) {
9761     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9762                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9763   } else {
9764     RegionCodeGenTy ThenRCG(TargetThenGen);
9765     ThenRCG(CGF);
9766   }
9767 }
9768 
9769 namespace {
9770   /// Kind of parameter in a function with 'declare simd' directive.
9771   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9772   /// Attribute set of the parameter.
9773   struct ParamAttrTy {
9774     ParamKindTy Kind = Vector;
9775     llvm::APSInt StrideOrArg;
9776     llvm::APSInt Alignment;
9777   };
9778 } // namespace
9779 
9780 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9781                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9782   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9783   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9784   // of that clause. The VLEN value must be power of 2.
9785   // In other case the notion of the function`s "characteristic data type" (CDT)
9786   // is used to compute the vector length.
9787   // CDT is defined in the following order:
9788   //   a) For non-void function, the CDT is the return type.
9789   //   b) If the function has any non-uniform, non-linear parameters, then the
9790   //   CDT is the type of the first such parameter.
9791   //   c) If the CDT determined by a) or b) above is struct, union, or class
9792   //   type which is pass-by-value (except for the type that maps to the
9793   //   built-in complex data type), the characteristic data type is int.
9794   //   d) If none of the above three cases is applicable, the CDT is int.
9795   // The VLEN is then determined based on the CDT and the size of vector
9796   // register of that ISA for which current vector version is generated. The
9797   // VLEN is computed using the formula below:
9798   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9799   // where vector register size specified in section 3.2.1 Registers and the
9800   // Stack Frame of original AMD64 ABI document.
9801   QualType RetType = FD->getReturnType();
9802   if (RetType.isNull())
9803     return 0;
9804   ASTContext &C = FD->getASTContext();
9805   QualType CDT;
9806   if (!RetType.isNull() && !RetType->isVoidType()) {
9807     CDT = RetType;
9808   } else {
9809     unsigned Offset = 0;
9810     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9811       if (ParamAttrs[Offset].Kind == Vector)
9812         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9813       ++Offset;
9814     }
9815     if (CDT.isNull()) {
9816       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9817         if (ParamAttrs[I + Offset].Kind == Vector) {
9818           CDT = FD->getParamDecl(I)->getType();
9819           break;
9820         }
9821       }
9822     }
9823   }
9824   if (CDT.isNull())
9825     CDT = C.IntTy;
9826   CDT = CDT->getCanonicalTypeUnqualified();
9827   if (CDT->isRecordType() || CDT->isUnionType())
9828     CDT = C.IntTy;
9829   return C.getTypeSize(CDT);
9830 }
9831 
9832 static void
9833 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9834                            const llvm::APSInt &VLENVal,
9835                            ArrayRef<ParamAttrTy> ParamAttrs,
9836                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9837   struct ISADataTy {
9838     char ISA;
9839     unsigned VecRegSize;
9840   };
9841   ISADataTy ISAData[] = {
9842       {
9843           'b', 128
9844       }, // SSE
9845       {
9846           'c', 256
9847       }, // AVX
9848       {
9849           'd', 256
9850       }, // AVX2
9851       {
9852           'e', 512
9853       }, // AVX512
9854   };
9855   llvm::SmallVector<char, 2> Masked;
9856   switch (State) {
9857   case OMPDeclareSimdDeclAttr::BS_Undefined:
9858     Masked.push_back('N');
9859     Masked.push_back('M');
9860     break;
9861   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9862     Masked.push_back('N');
9863     break;
9864   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9865     Masked.push_back('M');
9866     break;
9867   }
9868   for (char Mask : Masked) {
9869     for (const ISADataTy &Data : ISAData) {
9870       SmallString<256> Buffer;
9871       llvm::raw_svector_ostream Out(Buffer);
9872       Out << "_ZGV" << Data.ISA << Mask;
9873       if (!VLENVal) {
9874         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9875         assert(NumElts && "Non-zero simdlen/cdtsize expected");
9876         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9877       } else {
9878         Out << VLENVal;
9879       }
9880       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9881         switch (ParamAttr.Kind){
9882         case LinearWithVarStride:
9883           Out << 's' << ParamAttr.StrideOrArg;
9884           break;
9885         case Linear:
9886           Out << 'l';
9887           if (!!ParamAttr.StrideOrArg)
9888             Out << ParamAttr.StrideOrArg;
9889           break;
9890         case Uniform:
9891           Out << 'u';
9892           break;
9893         case Vector:
9894           Out << 'v';
9895           break;
9896         }
9897         if (!!ParamAttr.Alignment)
9898           Out << 'a' << ParamAttr.Alignment;
9899       }
9900       Out << '_' << Fn->getName();
9901       Fn->addFnAttr(Out.str());
9902     }
9903   }
9904 }
9905 
9906 // This are the Functions that are needed to mangle the name of the
9907 // vector functions generated by the compiler, according to the rules
9908 // defined in the "Vector Function ABI specifications for AArch64",
9909 // available at
9910 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9911 
9912 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9913 ///
9914 /// TODO: Need to implement the behavior for reference marked with a
9915 /// var or no linear modifiers (1.b in the section). For this, we
9916 /// need to extend ParamKindTy to support the linear modifiers.
9917 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9918   QT = QT.getCanonicalType();
9919 
9920   if (QT->isVoidType())
9921     return false;
9922 
9923   if (Kind == ParamKindTy::Uniform)
9924     return false;
9925 
9926   if (Kind == ParamKindTy::Linear)
9927     return false;
9928 
9929   // TODO: Handle linear references with modifiers
9930 
9931   if (Kind == ParamKindTy::LinearWithVarStride)
9932     return false;
9933 
9934   return true;
9935 }
9936 
9937 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9938 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9939   QT = QT.getCanonicalType();
9940   unsigned Size = C.getTypeSize(QT);
9941 
9942   // Only scalars and complex within 16 bytes wide set PVB to true.
9943   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9944     return false;
9945 
9946   if (QT->isFloatingType())
9947     return true;
9948 
9949   if (QT->isIntegerType())
9950     return true;
9951 
9952   if (QT->isPointerType())
9953     return true;
9954 
9955   // TODO: Add support for complex types (section 3.1.2, item 2).
9956 
9957   return false;
9958 }
9959 
9960 /// Computes the lane size (LS) of a return type or of an input parameter,
9961 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9962 /// TODO: Add support for references, section 3.2.1, item 1.
9963 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9964   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9965     QualType PTy = QT.getCanonicalType()->getPointeeType();
9966     if (getAArch64PBV(PTy, C))
9967       return C.getTypeSize(PTy);
9968   }
9969   if (getAArch64PBV(QT, C))
9970     return C.getTypeSize(QT);
9971 
9972   return C.getTypeSize(C.getUIntPtrType());
9973 }
9974 
9975 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9976 // signature of the scalar function, as defined in 3.2.2 of the
9977 // AAVFABI.
9978 static std::tuple<unsigned, unsigned, bool>
9979 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9980   QualType RetType = FD->getReturnType().getCanonicalType();
9981 
9982   ASTContext &C = FD->getASTContext();
9983 
9984   bool OutputBecomesInput = false;
9985 
9986   llvm::SmallVector<unsigned, 8> Sizes;
9987   if (!RetType->isVoidType()) {
9988     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9989     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9990       OutputBecomesInput = true;
9991   }
9992   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9993     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9994     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9995   }
9996 
9997   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9998   // The LS of a function parameter / return value can only be a power
9999   // of 2, starting from 8 bits, up to 128.
10000   assert(std::all_of(Sizes.begin(), Sizes.end(),
10001                      [](unsigned Size) {
10002                        return Size == 8 || Size == 16 || Size == 32 ||
10003                               Size == 64 || Size == 128;
10004                      }) &&
10005          "Invalid size");
10006 
10007   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10008                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10009                          OutputBecomesInput);
10010 }
10011 
10012 /// Mangle the parameter part of the vector function name according to
10013 /// their OpenMP classification. The mangling function is defined in
10014 /// section 3.5 of the AAVFABI.
10015 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10016   SmallString<256> Buffer;
10017   llvm::raw_svector_ostream Out(Buffer);
10018   for (const auto &ParamAttr : ParamAttrs) {
10019     switch (ParamAttr.Kind) {
10020     case LinearWithVarStride:
10021       Out << "ls" << ParamAttr.StrideOrArg;
10022       break;
10023     case Linear:
10024       Out << 'l';
10025       // Don't print the step value if it is not present or if it is
10026       // equal to 1.
10027       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10028         Out << ParamAttr.StrideOrArg;
10029       break;
10030     case Uniform:
10031       Out << 'u';
10032       break;
10033     case Vector:
10034       Out << 'v';
10035       break;
10036     }
10037 
10038     if (!!ParamAttr.Alignment)
10039       Out << 'a' << ParamAttr.Alignment;
10040   }
10041 
10042   return Out.str();
10043 }
10044 
10045 // Function used to add the attribute. The parameter `VLEN` is
10046 // templated to allow the use of "x" when targeting scalable functions
10047 // for SVE.
10048 template <typename T>
10049 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10050                                  char ISA, StringRef ParSeq,
10051                                  StringRef MangledName, bool OutputBecomesInput,
10052                                  llvm::Function *Fn) {
10053   SmallString<256> Buffer;
10054   llvm::raw_svector_ostream Out(Buffer);
10055   Out << Prefix << ISA << LMask << VLEN;
10056   if (OutputBecomesInput)
10057     Out << "v";
10058   Out << ParSeq << "_" << MangledName;
10059   Fn->addFnAttr(Out.str());
10060 }
10061 
10062 // Helper function to generate the Advanced SIMD names depending on
10063 // the value of the NDS when simdlen is not present.
10064 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10065                                       StringRef Prefix, char ISA,
10066                                       StringRef ParSeq, StringRef MangledName,
10067                                       bool OutputBecomesInput,
10068                                       llvm::Function *Fn) {
10069   switch (NDS) {
10070   case 8:
10071     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10072                          OutputBecomesInput, Fn);
10073     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10074                          OutputBecomesInput, Fn);
10075     break;
10076   case 16:
10077     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10078                          OutputBecomesInput, Fn);
10079     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10080                          OutputBecomesInput, Fn);
10081     break;
10082   case 32:
10083     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10084                          OutputBecomesInput, Fn);
10085     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10086                          OutputBecomesInput, Fn);
10087     break;
10088   case 64:
10089   case 128:
10090     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10091                          OutputBecomesInput, Fn);
10092     break;
10093   default:
10094     llvm_unreachable("Scalar type is too wide.");
10095   }
10096 }
10097 
10098 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10099 static void emitAArch64DeclareSimdFunction(
10100     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10101     ArrayRef<ParamAttrTy> ParamAttrs,
10102     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10103     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10104 
10105   // Get basic data for building the vector signature.
10106   const auto Data = getNDSWDS(FD, ParamAttrs);
10107   const unsigned NDS = std::get<0>(Data);
10108   const unsigned WDS = std::get<1>(Data);
10109   const bool OutputBecomesInput = std::get<2>(Data);
10110 
10111   // Check the values provided via `simdlen` by the user.
10112   // 1. A `simdlen(1)` doesn't produce vector signatures,
10113   if (UserVLEN == 1) {
10114     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10115         DiagnosticsEngine::Warning,
10116         "The clause simdlen(1) has no effect when targeting aarch64.");
10117     CGM.getDiags().Report(SLoc, DiagID);
10118     return;
10119   }
10120 
10121   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10122   // Advanced SIMD output.
10123   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10124     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10125         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10126                                     "power of 2 when targeting Advanced SIMD.");
10127     CGM.getDiags().Report(SLoc, DiagID);
10128     return;
10129   }
10130 
10131   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10132   // limits.
10133   if (ISA == 's' && UserVLEN != 0) {
10134     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10135       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10136           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10137                                       "lanes in the architectural constraints "
10138                                       "for SVE (min is 128-bit, max is "
10139                                       "2048-bit, by steps of 128-bit)");
10140       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10141       return;
10142     }
10143   }
10144 
10145   // Sort out parameter sequence.
10146   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10147   StringRef Prefix = "_ZGV";
10148   // Generate simdlen from user input (if any).
10149   if (UserVLEN) {
10150     if (ISA == 's') {
10151       // SVE generates only a masked function.
10152       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10153                            OutputBecomesInput, Fn);
10154     } else {
10155       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10156       // Advanced SIMD generates one or two functions, depending on
10157       // the `[not]inbranch` clause.
10158       switch (State) {
10159       case OMPDeclareSimdDeclAttr::BS_Undefined:
10160         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10161                              OutputBecomesInput, Fn);
10162         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10163                              OutputBecomesInput, Fn);
10164         break;
10165       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10166         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10167                              OutputBecomesInput, Fn);
10168         break;
10169       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10170         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10171                              OutputBecomesInput, Fn);
10172         break;
10173       }
10174     }
10175   } else {
10176     // If no user simdlen is provided, follow the AAVFABI rules for
10177     // generating the vector length.
10178     if (ISA == 's') {
10179       // SVE, section 3.4.1, item 1.
10180       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10181                            OutputBecomesInput, Fn);
10182     } else {
10183       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10184       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10185       // two vector names depending on the use of the clause
10186       // `[not]inbranch`.
10187       switch (State) {
10188       case OMPDeclareSimdDeclAttr::BS_Undefined:
10189         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10190                                   OutputBecomesInput, Fn);
10191         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10192                                   OutputBecomesInput, Fn);
10193         break;
10194       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10195         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10196                                   OutputBecomesInput, Fn);
10197         break;
10198       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10199         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10200                                   OutputBecomesInput, Fn);
10201         break;
10202       }
10203     }
10204   }
10205 }
10206 
10207 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10208                                               llvm::Function *Fn) {
10209   ASTContext &C = CGM.getContext();
10210   FD = FD->getMostRecentDecl();
10211   // Map params to their positions in function decl.
10212   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10213   if (isa<CXXMethodDecl>(FD))
10214     ParamPositions.try_emplace(FD, 0);
10215   unsigned ParamPos = ParamPositions.size();
10216   for (const ParmVarDecl *P : FD->parameters()) {
10217     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10218     ++ParamPos;
10219   }
10220   while (FD) {
10221     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10222       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10223       // Mark uniform parameters.
10224       for (const Expr *E : Attr->uniforms()) {
10225         E = E->IgnoreParenImpCasts();
10226         unsigned Pos;
10227         if (isa<CXXThisExpr>(E)) {
10228           Pos = ParamPositions[FD];
10229         } else {
10230           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10231                                 ->getCanonicalDecl();
10232           Pos = ParamPositions[PVD];
10233         }
10234         ParamAttrs[Pos].Kind = Uniform;
10235       }
10236       // Get alignment info.
10237       auto NI = Attr->alignments_begin();
10238       for (const Expr *E : Attr->aligneds()) {
10239         E = E->IgnoreParenImpCasts();
10240         unsigned Pos;
10241         QualType ParmTy;
10242         if (isa<CXXThisExpr>(E)) {
10243           Pos = ParamPositions[FD];
10244           ParmTy = E->getType();
10245         } else {
10246           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10247                                 ->getCanonicalDecl();
10248           Pos = ParamPositions[PVD];
10249           ParmTy = PVD->getType();
10250         }
10251         ParamAttrs[Pos].Alignment =
10252             (*NI)
10253                 ? (*NI)->EvaluateKnownConstInt(C)
10254                 : llvm::APSInt::getUnsigned(
10255                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10256                           .getQuantity());
10257         ++NI;
10258       }
10259       // Mark linear parameters.
10260       auto SI = Attr->steps_begin();
10261       auto MI = Attr->modifiers_begin();
10262       for (const Expr *E : Attr->linears()) {
10263         E = E->IgnoreParenImpCasts();
10264         unsigned Pos;
10265         if (isa<CXXThisExpr>(E)) {
10266           Pos = ParamPositions[FD];
10267         } else {
10268           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10269                                 ->getCanonicalDecl();
10270           Pos = ParamPositions[PVD];
10271         }
10272         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10273         ParamAttr.Kind = Linear;
10274         if (*SI) {
10275           Expr::EvalResult Result;
10276           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10277             if (const auto *DRE =
10278                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10279               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10280                 ParamAttr.Kind = LinearWithVarStride;
10281                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10282                     ParamPositions[StridePVD->getCanonicalDecl()]);
10283               }
10284             }
10285           } else {
10286             ParamAttr.StrideOrArg = Result.Val.getInt();
10287           }
10288         }
10289         ++SI;
10290         ++MI;
10291       }
10292       llvm::APSInt VLENVal;
10293       SourceLocation ExprLoc;
10294       const Expr *VLENExpr = Attr->getSimdlen();
10295       if (VLENExpr) {
10296         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10297         ExprLoc = VLENExpr->getExprLoc();
10298       }
10299       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10300       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10301           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10302         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10303       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10304         unsigned VLEN = VLENVal.getExtValue();
10305         StringRef MangledName = Fn->getName();
10306         if (CGM.getTarget().hasFeature("sve"))
10307           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10308                                          MangledName, 's', 128, Fn, ExprLoc);
10309         if (CGM.getTarget().hasFeature("neon"))
10310           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10311                                          MangledName, 'n', 128, Fn, ExprLoc);
10312       }
10313     }
10314     FD = FD->getPreviousDecl();
10315   }
10316 }
10317 
10318 namespace {
10319 /// Cleanup action for doacross support.
10320 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10321 public:
10322   static const int DoacrossFinArgs = 2;
10323 
10324 private:
10325   llvm::FunctionCallee RTLFn;
10326   llvm::Value *Args[DoacrossFinArgs];
10327 
10328 public:
10329   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10330                     ArrayRef<llvm::Value *> CallArgs)
10331       : RTLFn(RTLFn) {
10332     assert(CallArgs.size() == DoacrossFinArgs);
10333     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10334   }
10335   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10336     if (!CGF.HaveInsertPoint())
10337       return;
10338     CGF.EmitRuntimeCall(RTLFn, Args);
10339   }
10340 };
10341 } // namespace
10342 
10343 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10344                                        const OMPLoopDirective &D,
10345                                        ArrayRef<Expr *> NumIterations) {
10346   if (!CGF.HaveInsertPoint())
10347     return;
10348 
10349   ASTContext &C = CGM.getContext();
10350   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10351   RecordDecl *RD;
10352   if (KmpDimTy.isNull()) {
10353     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10354     //  kmp_int64 lo; // lower
10355     //  kmp_int64 up; // upper
10356     //  kmp_int64 st; // stride
10357     // };
10358     RD = C.buildImplicitRecord("kmp_dim");
10359     RD->startDefinition();
10360     addFieldToRecordDecl(C, RD, Int64Ty);
10361     addFieldToRecordDecl(C, RD, Int64Ty);
10362     addFieldToRecordDecl(C, RD, Int64Ty);
10363     RD->completeDefinition();
10364     KmpDimTy = C.getRecordType(RD);
10365   } else {
10366     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10367   }
10368   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10369   QualType ArrayTy =
10370       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10371 
10372   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10373   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10374   enum { LowerFD = 0, UpperFD, StrideFD };
10375   // Fill dims with data.
10376   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10377     LValue DimsLVal = CGF.MakeAddrLValue(
10378         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10379     // dims.upper = num_iterations;
10380     LValue UpperLVal = CGF.EmitLValueForField(
10381         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10382     llvm::Value *NumIterVal =
10383         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10384                                  D.getNumIterations()->getType(), Int64Ty,
10385                                  D.getNumIterations()->getExprLoc());
10386     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10387     // dims.stride = 1;
10388     LValue StrideLVal = CGF.EmitLValueForField(
10389         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10390     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10391                           StrideLVal);
10392   }
10393 
10394   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10395   // kmp_int32 num_dims, struct kmp_dim * dims);
10396   llvm::Value *Args[] = {
10397       emitUpdateLocation(CGF, D.getBeginLoc()),
10398       getThreadID(CGF, D.getBeginLoc()),
10399       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10400       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10401           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10402           CGM.VoidPtrTy)};
10403 
10404   llvm::FunctionCallee RTLFn =
10405       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10406   CGF.EmitRuntimeCall(RTLFn, Args);
10407   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10408       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10409   llvm::FunctionCallee FiniRTLFn =
10410       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10411   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10412                                              llvm::makeArrayRef(FiniArgs));
10413 }
10414 
10415 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10416                                           const OMPDependClause *C) {
10417   QualType Int64Ty =
10418       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10419   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10420   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10421       Int64Ty, Size, ArrayType::Normal, 0);
10422   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10423   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10424     const Expr *CounterVal = C->getLoopData(I);
10425     assert(CounterVal);
10426     llvm::Value *CntVal = CGF.EmitScalarConversion(
10427         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10428         CounterVal->getExprLoc());
10429     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10430                           /*Volatile=*/false, Int64Ty);
10431   }
10432   llvm::Value *Args[] = {
10433       emitUpdateLocation(CGF, C->getBeginLoc()),
10434       getThreadID(CGF, C->getBeginLoc()),
10435       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10436   llvm::FunctionCallee RTLFn;
10437   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10438     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10439   } else {
10440     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10441     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10442   }
10443   CGF.EmitRuntimeCall(RTLFn, Args);
10444 }
10445 
10446 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10447                                llvm::FunctionCallee Callee,
10448                                ArrayRef<llvm::Value *> Args) const {
10449   assert(Loc.isValid() && "Outlined function call location must be valid.");
10450   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10451 
10452   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10453     if (Fn->doesNotThrow()) {
10454       CGF.EmitNounwindRuntimeCall(Fn, Args);
10455       return;
10456     }
10457   }
10458   CGF.EmitRuntimeCall(Callee, Args);
10459 }
10460 
10461 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10462     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10463     ArrayRef<llvm::Value *> Args) const {
10464   emitCall(CGF, Loc, OutlinedFn, Args);
10465 }
10466 
10467 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10468   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10469     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10470       HasEmittedDeclareTargetRegion = true;
10471 }
10472 
10473 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10474                                              const VarDecl *NativeParam,
10475                                              const VarDecl *TargetParam) const {
10476   return CGF.GetAddrOfLocalVar(NativeParam);
10477 }
10478 
10479 namespace {
10480 /// Cleanup action for allocate support.
10481 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10482 public:
10483   static const int CleanupArgs = 3;
10484 
10485 private:
10486   llvm::FunctionCallee RTLFn;
10487   llvm::Value *Args[CleanupArgs];
10488 
10489 public:
10490   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10491                        ArrayRef<llvm::Value *> CallArgs)
10492       : RTLFn(RTLFn) {
10493     assert(CallArgs.size() == CleanupArgs &&
10494            "Size of arguments does not match.");
10495     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10496   }
10497   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10498     if (!CGF.HaveInsertPoint())
10499       return;
10500     CGF.EmitRuntimeCall(RTLFn, Args);
10501   }
10502 };
10503 } // namespace
10504 
10505 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10506                                                    const VarDecl *VD) {
10507   if (!VD)
10508     return Address::invalid();
10509   const VarDecl *CVD = VD->getCanonicalDecl();
10510   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10511     return Address::invalid();
10512   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10513   // Use the default allocation.
10514   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10515       !AA->getAllocator())
10516     return Address::invalid();
10517   llvm::Value *Size;
10518   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10519   if (CVD->getType()->isVariablyModifiedType()) {
10520     Size = CGF.getTypeSize(CVD->getType());
10521     // Align the size: ((size + align - 1) / align) * align
10522     Size = CGF.Builder.CreateNUWAdd(
10523         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10524     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10525     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10526   } else {
10527     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10528     Size = CGM.getSize(Sz.alignTo(Align));
10529   }
10530   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10531   assert(AA->getAllocator() &&
10532          "Expected allocator expression for non-default allocator.");
10533   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10534   // According to the standard, the original allocator type is a enum (integer).
10535   // Convert to pointer type, if required.
10536   if (Allocator->getType()->isIntegerTy())
10537     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10538   else if (Allocator->getType()->isPointerTy())
10539     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10540                                                                 CGM.VoidPtrTy);
10541   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10542 
10543   llvm::Value *Addr =
10544       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10545                           CVD->getName() + ".void.addr");
10546   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10547                                                               Allocator};
10548   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10549 
10550   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10551                                                 llvm::makeArrayRef(FiniArgs));
10552   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10553       Addr,
10554       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10555       CVD->getName() + ".addr");
10556   return Address(Addr, Align);
10557 }
10558 
10559 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10560     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10561     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10562   llvm_unreachable("Not supported in SIMD-only mode");
10563 }
10564 
10565 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10566     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10567     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10568   llvm_unreachable("Not supported in SIMD-only mode");
10569 }
10570 
10571 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10572     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10573     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10574     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10575     bool Tied, unsigned &NumberOfParts) {
10576   llvm_unreachable("Not supported in SIMD-only mode");
10577 }
10578 
10579 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10580                                            SourceLocation Loc,
10581                                            llvm::Function *OutlinedFn,
10582                                            ArrayRef<llvm::Value *> CapturedVars,
10583                                            const Expr *IfCond) {
10584   llvm_unreachable("Not supported in SIMD-only mode");
10585 }
10586 
10587 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10588     CodeGenFunction &CGF, StringRef CriticalName,
10589     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10590     const Expr *Hint) {
10591   llvm_unreachable("Not supported in SIMD-only mode");
10592 }
10593 
10594 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10595                                            const RegionCodeGenTy &MasterOpGen,
10596                                            SourceLocation Loc) {
10597   llvm_unreachable("Not supported in SIMD-only mode");
10598 }
10599 
10600 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10601                                             SourceLocation Loc) {
10602   llvm_unreachable("Not supported in SIMD-only mode");
10603 }
10604 
10605 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10606     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10607     SourceLocation Loc) {
10608   llvm_unreachable("Not supported in SIMD-only mode");
10609 }
10610 
10611 void CGOpenMPSIMDRuntime::emitSingleRegion(
10612     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10613     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10614     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10615     ArrayRef<const Expr *> AssignmentOps) {
10616   llvm_unreachable("Not supported in SIMD-only mode");
10617 }
10618 
10619 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10620                                             const RegionCodeGenTy &OrderedOpGen,
10621                                             SourceLocation Loc,
10622                                             bool IsThreads) {
10623   llvm_unreachable("Not supported in SIMD-only mode");
10624 }
10625 
10626 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10627                                           SourceLocation Loc,
10628                                           OpenMPDirectiveKind Kind,
10629                                           bool EmitChecks,
10630                                           bool ForceSimpleCall) {
10631   llvm_unreachable("Not supported in SIMD-only mode");
10632 }
10633 
10634 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10635     CodeGenFunction &CGF, SourceLocation Loc,
10636     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10637     bool Ordered, const DispatchRTInput &DispatchValues) {
10638   llvm_unreachable("Not supported in SIMD-only mode");
10639 }
10640 
10641 void CGOpenMPSIMDRuntime::emitForStaticInit(
10642     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10643     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10644   llvm_unreachable("Not supported in SIMD-only mode");
10645 }
10646 
10647 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10648     CodeGenFunction &CGF, SourceLocation Loc,
10649     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10650   llvm_unreachable("Not supported in SIMD-only mode");
10651 }
10652 
10653 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10654                                                      SourceLocation Loc,
10655                                                      unsigned IVSize,
10656                                                      bool IVSigned) {
10657   llvm_unreachable("Not supported in SIMD-only mode");
10658 }
10659 
10660 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10661                                               SourceLocation Loc,
10662                                               OpenMPDirectiveKind DKind) {
10663   llvm_unreachable("Not supported in SIMD-only mode");
10664 }
10665 
10666 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10667                                               SourceLocation Loc,
10668                                               unsigned IVSize, bool IVSigned,
10669                                               Address IL, Address LB,
10670                                               Address UB, Address ST) {
10671   llvm_unreachable("Not supported in SIMD-only mode");
10672 }
10673 
10674 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10675                                                llvm::Value *NumThreads,
10676                                                SourceLocation Loc) {
10677   llvm_unreachable("Not supported in SIMD-only mode");
10678 }
10679 
10680 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10681                                              OpenMPProcBindClauseKind ProcBind,
10682                                              SourceLocation Loc) {
10683   llvm_unreachable("Not supported in SIMD-only mode");
10684 }
10685 
10686 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10687                                                     const VarDecl *VD,
10688                                                     Address VDAddr,
10689                                                     SourceLocation Loc) {
10690   llvm_unreachable("Not supported in SIMD-only mode");
10691 }
10692 
10693 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10694     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10695     CodeGenFunction *CGF) {
10696   llvm_unreachable("Not supported in SIMD-only mode");
10697 }
10698 
10699 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10700     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10701   llvm_unreachable("Not supported in SIMD-only mode");
10702 }
10703 
10704 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10705                                     ArrayRef<const Expr *> Vars,
10706                                     SourceLocation Loc) {
10707   llvm_unreachable("Not supported in SIMD-only mode");
10708 }
10709 
10710 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10711                                        const OMPExecutableDirective &D,
10712                                        llvm::Function *TaskFunction,
10713                                        QualType SharedsTy, Address Shareds,
10714                                        const Expr *IfCond,
10715                                        const OMPTaskDataTy &Data) {
10716   llvm_unreachable("Not supported in SIMD-only mode");
10717 }
10718 
10719 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10720     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10721     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10722     const Expr *IfCond, const OMPTaskDataTy &Data) {
10723   llvm_unreachable("Not supported in SIMD-only mode");
10724 }
10725 
10726 void CGOpenMPSIMDRuntime::emitReduction(
10727     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10728     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10729     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10730   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10731   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10732                                  ReductionOps, Options);
10733 }
10734 
10735 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10736     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10737     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10738   llvm_unreachable("Not supported in SIMD-only mode");
10739 }
10740 
10741 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10742                                                   SourceLocation Loc,
10743                                                   ReductionCodeGen &RCG,
10744                                                   unsigned N) {
10745   llvm_unreachable("Not supported in SIMD-only mode");
10746 }
10747 
10748 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10749                                                   SourceLocation Loc,
10750                                                   llvm::Value *ReductionsPtr,
10751                                                   LValue SharedLVal) {
10752   llvm_unreachable("Not supported in SIMD-only mode");
10753 }
10754 
10755 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10756                                            SourceLocation Loc) {
10757   llvm_unreachable("Not supported in SIMD-only mode");
10758 }
10759 
10760 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10761     CodeGenFunction &CGF, SourceLocation Loc,
10762     OpenMPDirectiveKind CancelRegion) {
10763   llvm_unreachable("Not supported in SIMD-only mode");
10764 }
10765 
10766 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10767                                          SourceLocation Loc, const Expr *IfCond,
10768                                          OpenMPDirectiveKind CancelRegion) {
10769   llvm_unreachable("Not supported in SIMD-only mode");
10770 }
10771 
10772 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10773     const OMPExecutableDirective &D, StringRef ParentName,
10774     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10775     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10776   llvm_unreachable("Not supported in SIMD-only mode");
10777 }
10778 
10779 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10780                                          const OMPExecutableDirective &D,
10781                                          llvm::Function *OutlinedFn,
10782                                          llvm::Value *OutlinedFnID,
10783                                          const Expr *IfCond,
10784                                          const Expr *Device) {
10785   llvm_unreachable("Not supported in SIMD-only mode");
10786 }
10787 
10788 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10789   llvm_unreachable("Not supported in SIMD-only mode");
10790 }
10791 
10792 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10793   llvm_unreachable("Not supported in SIMD-only mode");
10794 }
10795 
10796 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10797   return false;
10798 }
10799 
10800 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10801   return nullptr;
10802 }
10803 
10804 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10805                                         const OMPExecutableDirective &D,
10806                                         SourceLocation Loc,
10807                                         llvm::Function *OutlinedFn,
10808                                         ArrayRef<llvm::Value *> CapturedVars) {
10809   llvm_unreachable("Not supported in SIMD-only mode");
10810 }
10811 
10812 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10813                                              const Expr *NumTeams,
10814                                              const Expr *ThreadLimit,
10815                                              SourceLocation Loc) {
10816   llvm_unreachable("Not supported in SIMD-only mode");
10817 }
10818 
10819 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10820     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10821     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10822   llvm_unreachable("Not supported in SIMD-only mode");
10823 }
10824 
10825 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10826     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10827     const Expr *Device) {
10828   llvm_unreachable("Not supported in SIMD-only mode");
10829 }
10830 
10831 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10832                                            const OMPLoopDirective &D,
10833                                            ArrayRef<Expr *> NumIterations) {
10834   llvm_unreachable("Not supported in SIMD-only mode");
10835 }
10836 
10837 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10838                                               const OMPDependClause *C) {
10839   llvm_unreachable("Not supported in SIMD-only mode");
10840 }
10841 
10842 const VarDecl *
10843 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10844                                         const VarDecl *NativeParam) const {
10845   llvm_unreachable("Not supported in SIMD-only mode");
10846 }
10847 
10848 Address
10849 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10850                                          const VarDecl *NativeParam,
10851                                          const VarDecl *TargetParam) const {
10852   llvm_unreachable("Not supported in SIMD-only mode");
10853 }
10854